mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Merge remote-tracking branch 'origin/topic/seth/files-reassembly-and-mime-updates' into topic/jsiwek/file-reassembly-merge
Conflicts: testing/btest/Baseline/scripts.policy.misc.dump-events/all-events.log
This commit is contained in:
commit
edaf7edc11
63 changed files with 1108 additions and 1081 deletions
|
@ -72,10 +72,10 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
|
||||
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a)
|
||||
{
|
||||
HashKey* key = GetKey(tag, args);
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
|
||||
a = InstantiateAnalyzer(tag, args);
|
||||
|
||||
if ( ! a )
|
||||
{
|
||||
|
|
|
@ -57,9 +57,10 @@ public:
|
|||
* Queue the attachment of an analyzer to #file.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @param a an analyzer pointer to return the instantiated analyzer to the caller.
|
||||
* @return true if analyzer was able to be instantiated, else false.
|
||||
*/
|
||||
bool QueueAdd(file_analysis::Tag tag, RecordVal* args);
|
||||
bool QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a);
|
||||
|
||||
/**
|
||||
* Remove an analyzer from #file immediately.
|
||||
|
|
|
@ -11,6 +11,7 @@ set(file_analysis_SRCS
|
|||
Manager.cc
|
||||
File.cc
|
||||
FileTimer.cc
|
||||
FileReassembler.cc
|
||||
Analyzer.cc
|
||||
AnalyzerSet.cc
|
||||
Component.cc
|
||||
|
|
|
@ -53,8 +53,6 @@ int File::overflow_bytes_idx = -1;
|
|||
int File::timeout_interval_idx = -1;
|
||||
int File::bof_buffer_size_idx = -1;
|
||||
int File::bof_buffer_idx = -1;
|
||||
int File::mime_type_idx = -1;
|
||||
int File::mime_types_idx = -1;
|
||||
|
||||
void File::StaticInit()
|
||||
{
|
||||
|
@ -74,15 +72,14 @@ void File::StaticInit()
|
|||
timeout_interval_idx = Idx("timeout_interval");
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size");
|
||||
bof_buffer_idx = Idx("bof_buffer");
|
||||
mime_type_idx = Idx("mime_type");
|
||||
mime_types_idx = Idx("mime_types");
|
||||
}
|
||||
|
||||
File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
||||
bool is_orig)
|
||||
: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
|
||||
missed_bof(false), need_reassembly(false), done(false),
|
||||
did_file_new_event(false), analyzers(this)
|
||||
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
|
||||
reassembly_max_buffer(0), did_mime_type(false),
|
||||
reassembly_enabled(false), postpone_timeout(false), done(false),
|
||||
analyzers(this)
|
||||
{
|
||||
StaticInit();
|
||||
|
||||
|
@ -96,7 +93,6 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
|||
// add source, connection, is_orig fields
|
||||
SetSource(analyzer_mgr->GetComponentName(tag));
|
||||
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
|
||||
UpdateConnectionFields(conn, is_orig);
|
||||
}
|
||||
|
||||
UpdateLastActivityTime();
|
||||
|
@ -107,10 +103,9 @@ File::~File()
|
|||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
|
||||
Unref(val);
|
||||
|
||||
while ( ! fonc_queue.empty() )
|
||||
if ( file_reassembler )
|
||||
{
|
||||
delete_vals(fonc_queue.front().second);
|
||||
fonc_queue.pop();
|
||||
delete file_reassembler;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,11 +145,7 @@ void File::UpdateConnectionFields(Connection* conn, bool is_orig)
|
|||
vl->append(conn_val->Ref());
|
||||
vl->append(new Val(is_orig, TYPE_BOOL));
|
||||
|
||||
if ( did_file_new_event )
|
||||
FileEvent(file_over_new_connection, vl);
|
||||
else
|
||||
fonc_queue.push(pair<EventHandlerPtr, val_list*>(
|
||||
file_over_new_connection, vl));
|
||||
FileEvent(file_over_new_connection, vl);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -242,7 +233,7 @@ bool File::IsComplete() const
|
|||
if ( ! total )
|
||||
return false;
|
||||
|
||||
if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() )
|
||||
if ( stream_offset >= total->AsCount() )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -258,7 +249,23 @@ bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
|||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
|
||||
id.c_str(), file_mgr->GetComponentName(tag).c_str());
|
||||
|
||||
return done ? false : analyzers.QueueAdd(tag, args);
|
||||
if ( done )
|
||||
return false;
|
||||
|
||||
file_analysis::Analyzer *a = 0;
|
||||
bool success = analyzers.QueueAdd(tag, args, a);
|
||||
if ( success && a )
|
||||
{
|
||||
// Catch up this analyzer with the BOF buffer
|
||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||
{
|
||||
if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
|
@ -269,9 +276,61 @@ bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
|||
return done ? false : analyzers.QueueRemove(tag, args);
|
||||
}
|
||||
|
||||
void File::EnableReassembly()
|
||||
{
|
||||
reassembly_enabled = true;
|
||||
}
|
||||
|
||||
void File::DisableReassembly()
|
||||
{
|
||||
reassembly_enabled = false;
|
||||
if ( file_reassembler )
|
||||
{
|
||||
delete file_reassembler;
|
||||
file_reassembler = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void File::SetReassemblyBuffer(uint64 max)
|
||||
{
|
||||
reassembly_max_buffer = max;
|
||||
}
|
||||
|
||||
bool File::DetectMIME()
|
||||
{
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
|
||||
BroString *bs = concatenate(bof_buffer.chunks);
|
||||
const u_char* data = bs->Bytes();
|
||||
uint64 len = bs->Len();
|
||||
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
|
||||
file_mgr->DetectMIME(data, len, &matches);
|
||||
|
||||
if ( matches.empty() )
|
||||
return false;
|
||||
|
||||
if ( FileEventAvailable(file_mime_type) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(new StringVal(*(matches.begin()->second.begin())));
|
||||
FileEvent(file_mime_type, vl);
|
||||
}
|
||||
|
||||
if ( FileEventAvailable(file_mime_types) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(file_analysis::GenMIMEMatchesVal(matches));
|
||||
FileEvent(file_mime_types, vl);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool File::BufferBOF(const u_char* data, uint64 len)
|
||||
{
|
||||
if ( bof_buffer.full || bof_buffer.replayed )
|
||||
if ( bof_buffer.full )
|
||||
return false;
|
||||
|
||||
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
|
||||
|
@ -282,101 +341,91 @@ bool File::BufferBOF(const u_char* data, uint64 len)
|
|||
if ( bof_buffer.size >= desired_size )
|
||||
{
|
||||
bof_buffer.full = true;
|
||||
ReplayBOF();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool File::DetectMIME(const u_char* data, uint64 len)
|
||||
void File::DeliverStream(const u_char* data, uint64 len)
|
||||
{
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
|
||||
file_mgr->DetectMIME(data, len, &matches);
|
||||
// Buffer enough data for the BOF buffer
|
||||
BufferBOF(data, len);
|
||||
|
||||
if ( matches.empty() )
|
||||
return false;
|
||||
|
||||
val->Assign(mime_type_idx,
|
||||
new StringVal(*(matches.begin()->second.begin())));
|
||||
val->Assign(mime_types_idx, file_analysis::GenMIMEMatchesVal(matches));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void File::ReplayBOF()
|
||||
{
|
||||
if ( bof_buffer.replayed )
|
||||
return;
|
||||
|
||||
bof_buffer.replayed = true;
|
||||
|
||||
if ( bof_buffer.chunks.empty() )
|
||||
// TODO: mime matching size needs defined.
|
||||
if ( ! did_mime_type &&
|
||||
bof_buffer.size >= 1024 &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
{
|
||||
// Since we missed the beginning, try file type detect on next data in.
|
||||
missed_bof = true;
|
||||
return;
|
||||
did_mime_type = true;
|
||||
DetectMIME();
|
||||
|
||||
// TODO: this needs to be done elsewhere. For now it's here.
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
}
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
|
||||
DetectMIME(bs->Bytes(), bs->Len());
|
||||
FileEvent(file_new);
|
||||
|
||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
|
||||
}
|
||||
|
||||
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
|
||||
if ( first_chunk )
|
||||
{
|
||||
// TODO: this should all really be delayed until we attempt reassembly
|
||||
DetectMIME(data, len);
|
||||
FileEvent(file_new);
|
||||
first_chunk = false;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
|
||||
id.c_str(), len, offset,
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset %" PRIu64 "; %s [%s]",
|
||||
id.c_str(), len, stream_offset,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( ! a->DeliverChunk(data, len, offset) )
|
||||
if ( !a->DeliverStream(data, len) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
// TODO: check reassembly requirement based on buffer size in record
|
||||
if ( need_reassembly )
|
||||
reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported");
|
||||
|
||||
// TODO: reassembly overflow stuff, increment overflow count, eval trigger
|
||||
|
||||
stream_offset += len;
|
||||
IncrementByteCount(len, seen_bytes_idx);
|
||||
}
|
||||
|
||||
void File::DataIn(const u_char* data, uint64 len)
|
||||
void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
|
||||
if ( BufferBOF(data, len) )
|
||||
return;
|
||||
|
||||
if ( missed_bof )
|
||||
// Potentially handle reassembly and deliver to the stream analyzers.
|
||||
if ( file_reassembler )
|
||||
{
|
||||
DetectMIME(data, len);
|
||||
FileEvent(file_new);
|
||||
missed_bof = false;
|
||||
if ( reassembly_max_buffer > 0 &&
|
||||
reassembly_max_buffer < file_reassembler->TotalSize() )
|
||||
{
|
||||
uint64 first_offset = file_reassembler->GetFirstBlockOffset();
|
||||
int gap_bytes = file_reassembler->TrimToSeq(first_offset);
|
||||
|
||||
if ( FileEventAvailable(file_reassembly_overflow) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(new Val(stream_offset, TYPE_COUNT));
|
||||
vl->append(new Val(gap_bytes, TYPE_COUNT));
|
||||
FileEvent(file_reassembly_overflow, vl);
|
||||
}
|
||||
|
||||
Gap(stream_offset, gap_bytes);
|
||||
}
|
||||
|
||||
// Forward data to the reassembler.
|
||||
file_reassembler->NewBlock(network_time, offset, len, data);
|
||||
}
|
||||
else if ( stream_offset == offset )
|
||||
{
|
||||
// This is the normal case where a file is transferred linearly.
|
||||
// Nothing special should be done here.
|
||||
DeliverStream(data, len);
|
||||
}
|
||||
else if ( reassembly_enabled )
|
||||
{
|
||||
// This is data that doesn't match the offset and the reassembler
|
||||
// needs to be enabled.
|
||||
file_reassembler = new FileReassembler(this, stream_offset);
|
||||
file_reassembler->NewBlock(network_time, offset, len, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
// We can't reassemble so we throw out the data for streaming.
|
||||
IncrementByteCount(len, overflow_bytes_idx);
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
|
||||
|
@ -386,24 +435,37 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( ! a->DeliverStream(data, len) )
|
||||
if ( !a->DeliverChunk(data, len, offset) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
|
||||
LookupFieldDefaultCount(missing_bytes_idx);
|
||||
|
||||
if ( ! a->DeliverChunk(data, len, offset) )
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
if ( IsComplete() )
|
||||
{
|
||||
// If the file is complete we can automatically go and close out the file from here.
|
||||
EndOfFile();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
DeliverChunk(data, len, offset);
|
||||
analyzers.DrainModifications();
|
||||
}
|
||||
|
||||
void File::DataIn(const u_char* data, uint64 len)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
|
||||
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
|
||||
LookupFieldDefaultCount(missing_bytes_idx);
|
||||
DeliverChunk(data, len, offset);
|
||||
analyzers.DrainModifications();
|
||||
IncrementByteCount(len, seen_bytes_idx);
|
||||
}
|
||||
|
||||
void File::EndOfFile()
|
||||
|
@ -413,10 +475,19 @@ void File::EndOfFile()
|
|||
if ( done )
|
||||
return;
|
||||
|
||||
analyzers.DrainModifications();
|
||||
if ( ! did_mime_type )
|
||||
{
|
||||
DetectMIME();
|
||||
|
||||
// Send along anything that's been buffered, but never flushed.
|
||||
ReplayBOF();
|
||||
// TODO: this also needs to be done elsewhere.
|
||||
if ( bof_buffer.size > 0 )
|
||||
{
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
}
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
done = true;
|
||||
|
||||
|
@ -441,10 +512,6 @@ void File::Gap(uint64 offset, uint64 len)
|
|||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
// If we were buffering the beginning of the file, a gap means we've got
|
||||
// as much contiguous stuff at the beginning as possible, so work with that.
|
||||
ReplayBOF();
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
|
@ -464,6 +531,8 @@ void File::Gap(uint64 offset, uint64 len)
|
|||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
stream_offset += len;
|
||||
IncrementByteCount(len, missing_bytes_idx);
|
||||
}
|
||||
|
||||
|
@ -482,30 +551,13 @@ void File::FileEvent(EventHandlerPtr h)
|
|||
FileEvent(h, vl);
|
||||
}
|
||||
|
||||
static void flush_file_event_queue(queue<pair<EventHandlerPtr, val_list*> >& q)
|
||||
{
|
||||
while ( ! q.empty() )
|
||||
{
|
||||
pair<EventHandlerPtr, val_list*> p = q.front();
|
||||
mgr.QueueEvent(p.first, p.second);
|
||||
q.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void File::FileEvent(EventHandlerPtr h, val_list* vl)
|
||||
{
|
||||
if ( h == file_state_remove )
|
||||
flush_file_event_queue(fonc_queue);
|
||||
|
||||
mgr.QueueEvent(h, vl);
|
||||
|
||||
if ( h == file_new )
|
||||
{
|
||||
did_file_new_event = true;
|
||||
flush_file_event_queue(fonc_queue);
|
||||
}
|
||||
|
||||
if ( h == file_new || h == file_timeout || h == file_extraction_limit )
|
||||
if ( h == file_new || h == file_over_new_connection ||
|
||||
h == file_mime_type ||
|
||||
h == file_timeout || h == file_extraction_limit )
|
||||
{
|
||||
// immediate feedback is required for these events.
|
||||
mgr.Drain();
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "FileReassembler.h"
|
||||
#include "Conn.h"
|
||||
#include "Val.h"
|
||||
#include "Tag.h"
|
||||
|
@ -16,6 +17,8 @@
|
|||
|
||||
namespace file_analysis {
|
||||
|
||||
class FileReassembler;
|
||||
|
||||
/**
|
||||
* Wrapper class around \c fa_file record values from script layer.
|
||||
*/
|
||||
|
@ -166,6 +169,7 @@ public:
|
|||
|
||||
protected:
|
||||
friend class Manager;
|
||||
friend class FileReassembler;
|
||||
|
||||
/**
|
||||
* Constructor; only file_analysis::Manager should be creating these.
|
||||
|
@ -227,12 +231,37 @@ protected:
|
|||
/**
|
||||
* Does mime type detection via file magic signatures and assigns
|
||||
* strongest matching mime type (if available) to \c mime_type
|
||||
* field in #val.
|
||||
* @param data pointer to a chunk of file data.
|
||||
* @param len number of bytes in the data chunk.
|
||||
* field in #val. It uses the data in the BOF buffer
|
||||
* @return whether a mime type match was found.
|
||||
*/
|
||||
bool DetectMIME(const u_char* data, uint64 len);
|
||||
bool DetectMIME();
|
||||
|
||||
/**
|
||||
* Enables reassembly on the file.
|
||||
*/
|
||||
void EnableReassembly();
|
||||
|
||||
/**
|
||||
* Disables reassembly on the file. If there is an existing reassembler
|
||||
* for the file, this will cause it to be deleted and won't allow a new
|
||||
* one to be created until reassembly is reenabled.
|
||||
*/
|
||||
void DisableReassembly();
|
||||
|
||||
/**
|
||||
* Set a maximum allowed bytes of memory for file reassembly for this file.
|
||||
*/
|
||||
void SetReassemblyBuffer(uint64 max);
|
||||
|
||||
/**
|
||||
* Perform stream-wise delivery for analyzers that need it.
|
||||
*/
|
||||
void DeliverStream(const u_char* data, uint64 len);
|
||||
|
||||
/**
|
||||
* Perform chunk-wise delivery for analyzers that need it.
|
||||
*/
|
||||
void DeliverChunk(const u_char* data, uint64 len, uint64 offset);
|
||||
|
||||
/**
|
||||
* Lookup a record field index/offset by name.
|
||||
|
@ -246,25 +275,24 @@ protected:
|
|||
*/
|
||||
static void StaticInit();
|
||||
|
||||
private:
|
||||
protected:
|
||||
string id; /**< A pretty hash that likely identifies file */
|
||||
RecordVal* val; /**< \c fa_file from script layer. */
|
||||
FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */
|
||||
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
|
||||
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
|
||||
bool did_mime_type; /**< Whether the mime type ident has already been attempted. */
|
||||
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
|
||||
bool postpone_timeout; /**< Whether postponing timeout is requested. */
|
||||
bool first_chunk; /**< Track first non-linear chunk. */
|
||||
bool missed_bof; /**< Flags that we missed start of file. */
|
||||
bool need_reassembly; /**< Whether file stream reassembly is needed. */
|
||||
bool done; /**< If this object is about to be deleted. */
|
||||
bool did_file_new_event; /**< Whether the file_new event has been done. */
|
||||
AnalyzerSet analyzers; /**< A set of attached file analyzer. */
|
||||
queue<pair<EventHandlerPtr, val_list*> > fonc_queue;
|
||||
AnalyzerSet analyzers; /**< A set of attached file analyzers. */
|
||||
|
||||
struct BOF_Buffer {
|
||||
BOF_Buffer() : full(false), replayed(false), size(0) {}
|
||||
BOF_Buffer() : full(false), size(0) {}
|
||||
~BOF_Buffer()
|
||||
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
|
||||
|
||||
bool full;
|
||||
bool replayed;
|
||||
uint64 size;
|
||||
BroString::CVec chunks;
|
||||
} bof_buffer; /**< Beginning of file buffer. */
|
||||
|
|
65
src/file_analysis/FileReassembler.cc
Normal file
65
src/file_analysis/FileReassembler.cc
Normal file
|
@ -0,0 +1,65 @@
|
|||
|
||||
#include "FileReassembler.h"
|
||||
#include "File.h"
|
||||
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
class File;
|
||||
|
||||
FileReassembler::FileReassembler(File *f, uint64 starting_offset)
|
||||
: Reassembler(starting_offset), the_file(f)
|
||||
{
|
||||
}
|
||||
|
||||
FileReassembler::~FileReassembler()
|
||||
{
|
||||
}
|
||||
|
||||
void FileReassembler::BlockInserted(DataBlock* start_block)
|
||||
{
|
||||
if ( start_block->seq > last_reassem_seq ||
|
||||
start_block->upper <= last_reassem_seq )
|
||||
return;
|
||||
|
||||
for ( DataBlock* b = start_block;
|
||||
b && b->seq <= last_reassem_seq; b = b->next )
|
||||
{
|
||||
if ( b->seq == last_reassem_seq )
|
||||
{ // New stuff.
|
||||
uint64 len = b->Size();
|
||||
uint64 seq = last_reassem_seq;
|
||||
last_reassem_seq += len;
|
||||
the_file->DeliverStream(b->block, len);
|
||||
}
|
||||
}
|
||||
|
||||
// Throw out forwarded data
|
||||
TrimToSeq(last_reassem_seq);
|
||||
}
|
||||
|
||||
void FileReassembler::Undelivered(uint64 up_to_seq)
|
||||
{
|
||||
// Not doing anything here yet.
|
||||
}
|
||||
|
||||
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64 n)
|
||||
{
|
||||
// Not doing anything here yet.
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(FileReassembler, SER_FILE_REASSEMBLER);
|
||||
|
||||
bool FileReassembler::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
reporter->InternalError("FileReassembler::DoSerialize not implemented");
|
||||
return false; // Cannot be reached.
|
||||
}
|
||||
|
||||
bool FileReassembler::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
reporter->InternalError("FileReassembler::DoUnserialize not implemented");
|
||||
return false; // Cannot be reached.
|
||||
}
|
||||
|
||||
} // end file_analysis
|
47
src/file_analysis/FileReassembler.h
Normal file
47
src/file_analysis/FileReassembler.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
#ifndef FILE_ANALYSIS_FILEREASSEMBLER_H
|
||||
#define FILE_ANALYSIS_FILEREASSEMBLER_H
|
||||
|
||||
#include "Reassem.h"
|
||||
#include "File.h"
|
||||
|
||||
class BroFile;
|
||||
class Connection;
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
class File;
|
||||
|
||||
//const int STOP_ON_GAP = 1;
|
||||
//const int PUNT_ON_PARTIAL = 1;
|
||||
|
||||
class FileReassembler : public Reassembler {
|
||||
public:
|
||||
|
||||
FileReassembler(File* f, uint64 starting_offset);
|
||||
virtual ~FileReassembler();
|
||||
|
||||
void Done();
|
||||
uint64 GetFirstBlockOffset() { return blocks->seq; }
|
||||
|
||||
// Checks if we have delivered all contents that we can possibly
|
||||
// deliver for this endpoint.
|
||||
void CheckEOF();
|
||||
|
||||
protected:
|
||||
FileReassembler() { }
|
||||
|
||||
DECLARE_SERIAL(FileReassembler);
|
||||
|
||||
void Undelivered(uint64 up_to_seq);
|
||||
void BlockInserted(DataBlock* b);
|
||||
void Overlap(const u_char* b1, const u_char* b2, uint64 n);
|
||||
|
||||
unsigned int had_gap:1;
|
||||
unsigned int did_EOF:1;
|
||||
unsigned int skip_deliveries:1;
|
||||
File* the_file;
|
||||
};
|
||||
|
||||
} // namespace analyzer::*
|
||||
|
||||
#endif
|
|
@ -232,6 +232,39 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Manager::EnableReassembly(const string& file_id)
|
||||
{
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
||||
file->EnableReassembly();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::DisableReassembly(const string& file_id)
|
||||
{
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
||||
file->DisableReassembly();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::SetReassemblyBuffer(const string& file_id, uint64 max)
|
||||
{
|
||||
File* file = LookupFile(file_id);
|
||||
|
||||
if ( ! file )
|
||||
return false;
|
||||
|
||||
file->SetReassemblyBuffer(max);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::SetExtractionLimit(const string& file_id, RecordVal* args,
|
||||
uint64 n) const
|
||||
{
|
||||
|
@ -254,28 +287,6 @@ bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
|||
return file->AddAnalyzer(tag, args);
|
||||
}
|
||||
|
||||
TableVal* Manager::AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
|
||||
RecordVal* args)
|
||||
{
|
||||
if ( ! tag_set_type )
|
||||
tag_set_type = internal_type("files_tag_set")->AsTableType();
|
||||
|
||||
TableVal* sval = new TableVal(tag_set_type);
|
||||
TagSet* l = LookupMIMEType(mtype, false);
|
||||
|
||||
if ( ! l )
|
||||
return sval;
|
||||
|
||||
for ( TagSet::const_iterator i = l->begin(); i != l->end(); i++ )
|
||||
{
|
||||
file_analysis::Tag tag = *i;
|
||||
if ( AddAnalyzer(file_id, tag, args) )
|
||||
sval->Assign(tag.AsEnumVal(), 0);
|
||||
}
|
||||
|
||||
return sval;
|
||||
}
|
||||
|
||||
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const
|
||||
{
|
||||
|
@ -304,6 +315,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
|
|||
id_map.Insert(file_id.c_str(), rval);
|
||||
rval->ScheduleInactivityTimer();
|
||||
|
||||
// Generate file_new here so the manager knows about the file.
|
||||
rval->FileEvent(file_new);
|
||||
// Same for file_over_new_connection which is generated by
|
||||
// updating the connection fields.
|
||||
rval->UpdateConnectionFields(conn, is_orig);
|
||||
|
||||
if ( IsIgnored(file_id) )
|
||||
return 0;
|
||||
}
|
||||
|
@ -461,63 +478,6 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
|
|||
return a;
|
||||
}
|
||||
|
||||
Manager::TagSet* Manager::LookupMIMEType(const string& mtype, bool add_if_not_found)
|
||||
{
|
||||
MIMEMap::const_iterator i = mime_types.find(to_upper(mtype));
|
||||
|
||||
if ( i != mime_types.end() )
|
||||
return i->second;
|
||||
|
||||
if ( ! add_if_not_found )
|
||||
return 0;
|
||||
|
||||
TagSet* l = new TagSet;
|
||||
mime_types.insert(std::make_pair(to_upper(mtype), l));
|
||||
return l;
|
||||
}
|
||||
|
||||
bool Manager::RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
|
||||
{
|
||||
Component* p = Lookup(tag);
|
||||
|
||||
if ( ! p )
|
||||
return false;
|
||||
|
||||
return RegisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
|
||||
}
|
||||
|
||||
bool Manager::RegisterAnalyzerForMIMEType(Tag tag, const string& mtype)
|
||||
{
|
||||
TagSet* l = LookupMIMEType(mtype, true);
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Register analyzer %s for MIME type %s",
|
||||
GetComponentName(tag).c_str(), mtype.c_str());
|
||||
|
||||
l->insert(tag);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
|
||||
{
|
||||
Component* p = Lookup(tag);
|
||||
|
||||
if ( ! p )
|
||||
return false;
|
||||
|
||||
return UnregisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
|
||||
}
|
||||
|
||||
bool Manager::UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype)
|
||||
{
|
||||
TagSet* l = LookupMIMEType(mtype, true);
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Unregister analyzer %s for MIME type %s",
|
||||
GetComponentName(tag).c_str(), mtype.c_str());
|
||||
|
||||
l->erase(tag);
|
||||
return true;
|
||||
}
|
||||
|
||||
RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len,
|
||||
RuleMatcher::MIME_Matches* rval) const
|
||||
{
|
||||
|
|
|
@ -213,6 +213,21 @@ public:
|
|||
*/
|
||||
bool SetTimeoutInterval(const string& file_id, double interval) const;
|
||||
|
||||
/**
|
||||
* Enable the reassembler for a file.
|
||||
*/
|
||||
bool EnableReassembly(const string& file_id);
|
||||
|
||||
/**
|
||||
* Disable the reassembler for a file.
|
||||
*/
|
||||
bool DisableReassembly(const string& file_id);
|
||||
|
||||
/**
|
||||
* Set the reassembly for a file in bytes.
|
||||
*/
|
||||
bool SetReassemblyBuffer(const string& file_id, uint64 max);
|
||||
|
||||
/**
|
||||
* Sets a limit on the maximum size allowed for extracting the file
|
||||
* to local disk;
|
||||
|
@ -238,18 +253,6 @@ public:
|
|||
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
|
||||
RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Queue attachment of an all analyzers associated with a given MIME
|
||||
* type to the file identifier.
|
||||
*
|
||||
* @param file_id the file identifier/hash.
|
||||
* @param mtype the MIME type; comparisions will be performanced case-insensitive.
|
||||
* @param args a \c AnalyzerArgs value which describes a file analyzer.
|
||||
* @return A ref'ed \c set[Tag] with all added analyzers.
|
||||
*/
|
||||
TableVal* AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
|
||||
RecordVal* args);
|
||||
|
||||
/**
|
||||
* Queue removal of an analyzer for a given file identifier.
|
||||
* @param file_id the file identifier/hash.
|
||||
|
@ -277,62 +280,6 @@ public:
|
|||
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
|
||||
|
||||
/**
|
||||
* Registers a MIME type for an analyzer. Once registered, files of
|
||||
* that MIME type will automatically get a corresponding analyzer
|
||||
* assigned.
|
||||
*
|
||||
* @param tag The analyzer's tag as an enum of script type \c
|
||||
* Files::Tag.
|
||||
*
|
||||
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||
*
|
||||
* @return True if successful.
|
||||
*/
|
||||
bool RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
|
||||
|
||||
/**
|
||||
* Registers a MIME type for an analyzer. Once registered, files of
|
||||
* that MIME type will automatically get a corresponding analyzer
|
||||
* assigned.
|
||||
*
|
||||
* @param tag The analyzer's tag as an enum of script type \c
|
||||
* Files::Tag.
|
||||
*
|
||||
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||
*
|
||||
* @return True if successful.
|
||||
*/
|
||||
bool RegisterAnalyzerForMIMEType(Tag tag, const string& mtype);
|
||||
|
||||
/**
|
||||
* Unregisters a MIME type for an analyzer.
|
||||
*
|
||||
* @param tag The analyzer's tag as an enum of script type \c
|
||||
* Files::Tag.
|
||||
*
|
||||
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||
*
|
||||
* @return True if successful (incl. when the type wasn't actually
|
||||
* registered for the analyzer).
|
||||
*
|
||||
*/
|
||||
bool UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
|
||||
|
||||
/**
|
||||
* Unregisters a MIME type for an analyzer.
|
||||
*
|
||||
* @param tag The analyzer's tag as an enum of script type \c
|
||||
* Files::Tag.
|
||||
*
|
||||
* @param mtype The MIME type. It will be matched case-insenistive.
|
||||
*
|
||||
* @return True if successful (incl. when the type wasn't actually
|
||||
* registered for the analyzer).
|
||||
*
|
||||
*/
|
||||
bool UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype);
|
||||
|
||||
/**
|
||||
* Returns a set of all matching MIME magic signatures for a given
|
||||
* chunk of data.
|
||||
* @param data A chunk of bytes to match magic MIME signatures against.
|
||||
|
|
|
@ -12,9 +12,9 @@ using namespace file_analysis;
|
|||
Extract::Extract(RecordVal* args, File* file, const string& arg_filename,
|
||||
uint64 arg_limit)
|
||||
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
|
||||
filename(arg_filename), limit(arg_limit)
|
||||
filename(arg_filename), limit(arg_limit), depth(0)
|
||||
{
|
||||
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
||||
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666);
|
||||
|
||||
if ( fd < 0 )
|
||||
{
|
||||
|
@ -53,7 +53,7 @@ file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
|
|||
limit->AsCount());
|
||||
}
|
||||
|
||||
static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n)
|
||||
static bool check_limit_exceeded(uint64 lim, uint64 depth, uint64 len, uint64* n)
|
||||
{
|
||||
if ( lim == 0 )
|
||||
{
|
||||
|
@ -61,29 +61,32 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n)
|
|||
return false;
|
||||
}
|
||||
|
||||
if ( off >= lim )
|
||||
if ( depth >= lim )
|
||||
{
|
||||
*n = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
*n = lim - off;
|
||||
|
||||
if ( len > *n )
|
||||
else if ( depth + len > lim )
|
||||
{
|
||||
printf("exceeded the maximum extraction lenght depth: %llu len: %llu lim: %llu\n", depth, len, lim);
|
||||
*n = lim - depth;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
*n = len;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
||||
bool Extract::DeliverStream(const u_char* data, uint64 len)
|
||||
{
|
||||
if ( ! fd )
|
||||
return false;
|
||||
|
||||
uint64 towrite = 0;
|
||||
bool limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite);
|
||||
bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
|
||||
|
||||
if ( limit_exceeded && file_extraction_limit )
|
||||
{
|
||||
|
@ -92,16 +95,24 @@ bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
|||
vl->append(f->GetVal()->Ref());
|
||||
vl->append(Args()->Ref());
|
||||
vl->append(new Val(limit, TYPE_COUNT));
|
||||
vl->append(new Val(offset, TYPE_COUNT));
|
||||
vl->append(new Val(len, TYPE_COUNT));
|
||||
f->FileEvent(file_extraction_limit, vl);
|
||||
|
||||
// Limit may have been modified by BIF, re-check it.
|
||||
limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite);
|
||||
// Limit may have been modified by a BIF, re-check it.
|
||||
limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
|
||||
}
|
||||
|
||||
if ( towrite > 0 )
|
||||
safe_pwrite(fd, data, towrite, offset);
|
||||
{
|
||||
safe_pwrite(fd, (const u_char *) data, towrite, depth);
|
||||
depth += towrite;
|
||||
}
|
||||
|
||||
return ( ! limit_exceeded );
|
||||
}
|
||||
|
||||
bool Extract::Undelivered(uint64 offset, uint64 len)
|
||||
{
|
||||
depth += len;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -28,11 +28,18 @@ public:
|
|||
* Write a chunk of file data to the local extraction file.
|
||||
* @param data pointer to a chunk of file data.
|
||||
* @param len number of bytes in the data chunk.
|
||||
* @param offset number of bytes from start of file at which chunk starts.
|
||||
* @return false if there was no extraction file open and the data couldn't
|
||||
* be written, else true.
|
||||
*/
|
||||
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
|
||||
virtual bool DeliverStream(const u_char* data, uint64 len);
|
||||
|
||||
/**
|
||||
* Report undelivered bytes.
|
||||
* @param offset distance into the file where the gap occurred.
|
||||
* @param len number of bytes undelivered.
|
||||
* @return true
|
||||
*/
|
||||
virtual bool Undelivered(uint64 offset, uint64 len);
|
||||
|
||||
/**
|
||||
* Create a new instance of an Extract analyzer.
|
||||
|
@ -67,6 +74,7 @@ private:
|
|||
string filename;
|
||||
int fd;
|
||||
uint64 limit;
|
||||
uint64 depth;
|
||||
};
|
||||
|
||||
} // namespace file_analysis
|
||||
|
|
|
@ -11,9 +11,7 @@
|
|||
##
|
||||
## limit: The limit, in bytes, the extracted file is about to breach.
|
||||
##
|
||||
## offset: The offset at which a file chunk is about to be written.
|
||||
##
|
||||
## len: The length of the file chunk about to be written.
|
||||
##
|
||||
## .. bro:see:: Files::add_analyzer Files::ANALYZER_EXTRACT
|
||||
event file_extraction_limit%(f: fa_file, args: any, limit: count, offset: count, len: count%);
|
||||
event file_extraction_limit%(f: fa_file, args: any, limit: count, len: count%);
|
||||
|
|
|
@ -15,6 +15,27 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
|
|||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::enable_reassembly`.
|
||||
function Files::__enable_reassembly%(file_id: string%): bool
|
||||
%{
|
||||
bool result = file_mgr->EnableReassembly(file_id->CheckString());
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::disable_reassembly`.
|
||||
function Files::__disable_reassembly%(file_id: string%): bool
|
||||
%{
|
||||
bool result = file_mgr->DisableReassembly(file_id->CheckString());
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::set_reassembly_buffer`.
|
||||
function Files::__set_reassembly_buffer%(file_id: string, max: count%): bool
|
||||
%{
|
||||
bool result = file_mgr->SetReassemblyBuffer(file_id->CheckString(), max);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::add_analyzer`.
|
||||
function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
||||
%{
|
||||
|
@ -26,16 +47,6 @@ function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): b
|
|||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::add_analyzers_for_mime_type`.
|
||||
function Files::__add_analyzers_for_mime_type%(file_id: string, mtype: string, args: any%): files_tag_set
|
||||
%{
|
||||
using BifType::Record::Files::AnalyzerArgs;
|
||||
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||
Val* analyzers = file_mgr->AddAnalyzersForMIMEType(file_id->CheckString(), mtype->CheckString(), rv);
|
||||
Unref(rv);
|
||||
return analyzers;
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::remove_analyzer`.
|
||||
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
|
||||
%{
|
||||
|
@ -60,13 +71,6 @@ function Files::__analyzer_name%(tag: Files::Tag%) : string
|
|||
return new StringVal(file_mgr->GetComponentName(tag));
|
||||
%}
|
||||
|
||||
## :bro:see:`Files::register_for_mime_type`.
|
||||
function Files::__register_for_mime_type%(id: Analyzer::Tag, mt: string%) : bool
|
||||
%{
|
||||
bool result = file_mgr->RegisterAnalyzerForMIMEType(id->AsEnumVal(), mt);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
module GLOBAL;
|
||||
|
||||
## For use within a :bro:see:`get_file_handle` handler to set a unique
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue