mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 23:58:20 +00:00
Review/fix/change file reassembly functionality.
- Re-arrange how some fa_file fields (e.g. source, connection info, mime type) get updated/set for consistency. - Add more robust mechanisms for flushing the reassembly buffer. The goal being to report all gaps and deliveries to file analyzers regardless of the state of the reassembly buffer at the time it has to be flushed.
This commit is contained in:
parent
edaf7edc11
commit
cbbe7b52dc
26 changed files with 370 additions and 238 deletions
|
@ -111,6 +111,18 @@ public:
|
|||
*/
|
||||
void SetAnalyzerTag(const file_analysis::Tag& tag);
|
||||
|
||||
/**
|
||||
* @return true if the analyzer has ever seen a stream-wise delivery.
|
||||
*/
|
||||
bool GotStreamDelivery() const
|
||||
{ return got_stream_delivery; }
|
||||
|
||||
/**
|
||||
* Flag the analyzer as having seen a stream-wise delivery.
|
||||
*/
|
||||
void SetGotStreamDelivery()
|
||||
{ got_stream_delivery = true; }
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
|
@ -123,7 +135,8 @@ protected:
|
|||
Analyzer(file_analysis::Tag arg_tag, RecordVal* arg_args, File* arg_file)
|
||||
: tag(arg_tag),
|
||||
args(arg_args->Ref()->AsRecordVal()),
|
||||
file(arg_file)
|
||||
file(arg_file),
|
||||
got_stream_delivery(false)
|
||||
{
|
||||
id = ++id_counter;
|
||||
}
|
||||
|
@ -140,7 +153,8 @@ protected:
|
|||
Analyzer(RecordVal* arg_args, File* arg_file)
|
||||
: tag(),
|
||||
args(arg_args->Ref()->AsRecordVal()),
|
||||
file(arg_file)
|
||||
file(arg_file),
|
||||
got_stream_delivery(false)
|
||||
{
|
||||
id = ++id_counter;
|
||||
}
|
||||
|
@ -151,6 +165,7 @@ private:
|
|||
file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
|
||||
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
|
||||
File* file; /**< The file to which the analyzer is attached. */
|
||||
bool got_stream_delivery;
|
||||
|
||||
static ID id_counter;
|
||||
};
|
||||
|
|
|
@ -72,20 +72,20 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a)
|
||||
Analyzer* AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
HashKey* key = GetKey(tag, args);
|
||||
a = InstantiateAnalyzer(tag, args);
|
||||
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
|
||||
|
||||
if ( ! a )
|
||||
{
|
||||
delete key;
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mod_queue.push(new AddMod(a, key));
|
||||
|
||||
return true;
|
||||
return a;
|
||||
}
|
||||
|
||||
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
|
||||
|
|
|
@ -57,10 +57,10 @@ public:
|
|||
* Queue the attachment of an analyzer to #file.
|
||||
* @param tag the analyzer tag of the file analyzer to add.
|
||||
* @param args an \c AnalyzerArgs value which specifies an analyzer.
|
||||
* @param a an analyzer pointer to return the instantiated analyzer to the caller.
|
||||
* @return true if analyzer was able to be instantiated, else false.
|
||||
* @return if successful, a pointer to a newly instantiated analyzer else
|
||||
* a null pointer. The caller does *not* take ownership of the memory.
|
||||
*/
|
||||
bool QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a);
|
||||
file_analysis::Analyzer* QueueAdd(file_analysis::Tag tag, RecordVal* args);
|
||||
|
||||
/**
|
||||
* Remove an analyzer from #file immediately.
|
||||
|
|
|
@ -74,8 +74,8 @@ void File::StaticInit()
|
|||
bof_buffer_idx = Idx("bof_buffer");
|
||||
}
|
||||
|
||||
File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
||||
bool is_orig)
|
||||
File::File(const string& file_id, const string& source_name, Connection* conn,
|
||||
analyzer::Tag tag, bool is_orig)
|
||||
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
|
||||
reassembly_max_buffer(0), did_mime_type(false),
|
||||
reassembly_enabled(false), postpone_timeout(false), done(false),
|
||||
|
@ -87,12 +87,12 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
|||
|
||||
val = new RecordVal(fa_file_type);
|
||||
val->Assign(id_idx, new StringVal(file_id.c_str()));
|
||||
SetSource(source_name);
|
||||
|
||||
if ( conn )
|
||||
{
|
||||
// add source, connection, is_orig fields
|
||||
SetSource(analyzer_mgr->GetComponentName(tag));
|
||||
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
|
||||
UpdateConnectionFields(conn, is_orig);
|
||||
}
|
||||
|
||||
UpdateLastActivityTime();
|
||||
|
@ -102,11 +102,7 @@ File::~File()
|
|||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
|
||||
Unref(val);
|
||||
|
||||
if ( file_reassembler )
|
||||
{
|
||||
delete file_reassembler;
|
||||
}
|
||||
delete file_reassembler;
|
||||
}
|
||||
|
||||
void File::UpdateLastActivityTime()
|
||||
|
@ -119,10 +115,10 @@ double File::GetLastActivityTime() const
|
|||
return val->Lookup(last_active_idx)->AsTime();
|
||||
}
|
||||
|
||||
void File::UpdateConnectionFields(Connection* conn, bool is_orig)
|
||||
bool File::UpdateConnectionFields(Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( ! conn )
|
||||
return;
|
||||
return false;
|
||||
|
||||
Val* conns = val->Lookup(conns_idx);
|
||||
|
||||
|
@ -133,23 +129,28 @@ void File::UpdateConnectionFields(Connection* conn, bool is_orig)
|
|||
}
|
||||
|
||||
Val* idx = get_conn_id_val(conn);
|
||||
if ( ! conns->AsTableVal()->Lookup(idx) )
|
||||
|
||||
if ( conns->AsTableVal()->Lookup(idx) )
|
||||
{
|
||||
Val* conn_val = conn->BuildConnVal();
|
||||
conns->AsTableVal()->Assign(idx, conn_val);
|
||||
|
||||
if ( FileEventAvailable(file_over_new_connection) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(conn_val->Ref());
|
||||
vl->append(new Val(is_orig, TYPE_BOOL));
|
||||
|
||||
FileEvent(file_over_new_connection, vl);
|
||||
}
|
||||
Unref(idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
|
||||
Unref(idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
void File::RaiseFileOverNewConnection(Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( conn && FileEventAvailable(file_over_new_connection) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(conn->BuildConnVal()->Ref());
|
||||
vl->append(new Val(is_orig, TYPE_BOOL));
|
||||
FileEvent(file_over_new_connection, vl);
|
||||
}
|
||||
}
|
||||
|
||||
uint64 File::LookupFieldDefaultCount(int idx) const
|
||||
|
@ -252,20 +253,7 @@ bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
|||
if ( done )
|
||||
return false;
|
||||
|
||||
file_analysis::Analyzer *a = 0;
|
||||
bool success = analyzers.QueueAdd(tag, args, a);
|
||||
if ( success && a )
|
||||
{
|
||||
// Catch up this analyzer with the BOF buffer
|
||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||
{
|
||||
if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
return analyzers.QueueAdd(tag, args) != 0;
|
||||
}
|
||||
|
||||
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
|
@ -284,11 +272,8 @@ void File::EnableReassembly()
|
|||
void File::DisableReassembly()
|
||||
{
|
||||
reassembly_enabled = false;
|
||||
if ( file_reassembler )
|
||||
{
|
||||
delete file_reassembler;
|
||||
file_reassembler = NULL;
|
||||
}
|
||||
delete file_reassembler;
|
||||
file_reassembler = 0;
|
||||
}
|
||||
|
||||
void File::SetReassemblyBuffer(uint64 max)
|
||||
|
@ -298,11 +283,23 @@ void File::SetReassemblyBuffer(uint64 max)
|
|||
|
||||
bool File::DetectMIME()
|
||||
{
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
did_mime_type = true;
|
||||
|
||||
BroString *bs = concatenate(bof_buffer.chunks);
|
||||
const u_char* data = bs->Bytes();
|
||||
uint64 len = bs->Len();
|
||||
Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
|
||||
|
||||
if ( ! bof_buffer_val )
|
||||
{
|
||||
if ( bof_buffer.size == 0 )
|
||||
return false;
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
bof_buffer_val = new StringVal(bs);
|
||||
val->Assign(bof_buffer_idx, bof_buffer_val);
|
||||
}
|
||||
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
const u_char* data = bof_buffer_val->AsString()->Bytes();
|
||||
uint64 len = bof_buffer_val->AsString()->Len();
|
||||
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
|
||||
file_mgr->DetectMIME(data, len, &matches);
|
||||
|
||||
|
@ -338,45 +335,70 @@ bool File::BufferBOF(const u_char* data, uint64 len)
|
|||
bof_buffer.chunks.push_back(new BroString(data, len, 0));
|
||||
bof_buffer.size += len;
|
||||
|
||||
if ( bof_buffer.size >= desired_size )
|
||||
if ( bof_buffer.size < desired_size )
|
||||
return true;
|
||||
|
||||
bof_buffer.full = true;
|
||||
|
||||
if ( bof_buffer.size > 0 )
|
||||
{
|
||||
bof_buffer.full = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void File::DeliverStream(const u_char* data, uint64 len)
|
||||
{
|
||||
// Buffer enough data for the BOF buffer
|
||||
BufferBOF(data, len);
|
||||
|
||||
// TODO: mime matching size needs defined.
|
||||
if ( ! did_mime_type &&
|
||||
bof_buffer.size >= 1024 &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
{
|
||||
did_mime_type = true;
|
||||
DetectMIME();
|
||||
|
||||
// TODO: this needs to be done elsewhere. For now it's here.
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset %" PRIu64 "; %s [%s]",
|
||||
id.c_str(), len, stream_offset,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||
return false;
|
||||
}
|
||||
|
||||
void File::DeliverStream(const u_char* data, uint64 len)
|
||||
{
|
||||
bool bof_was_full = bof_buffer.full;
|
||||
// Buffer enough data for the BOF buffer
|
||||
BufferBOF(data, len);
|
||||
|
||||
if ( ! did_mime_type && bof_buffer.full &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
DetectMIME();
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS,
|
||||
"[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
|
||||
id.c_str(), len, stream_offset,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)),
|
||||
len > 40 ? "..." : "");
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( !a->DeliverStream(data, len) )
|
||||
if ( ! a->GotStreamDelivery() )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
int num_bof_chunks_behind = bof_buffer.chunks.size();
|
||||
|
||||
if ( ! bof_was_full )
|
||||
// We just added a chunk to the BOF buffer, don't count it
|
||||
// as it will get delivered on its own.
|
||||
num_bof_chunks_behind -= 1;
|
||||
|
||||
uint64 bytes_delivered = 0;
|
||||
|
||||
// Catch this analyzer up with the BOF buffer.
|
||||
for ( int i = 0; i < num_bof_chunks_behind; ++i )
|
||||
{
|
||||
if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(),
|
||||
bof_buffer.chunks[i]->Len()) )
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
|
||||
bytes_delivered += bof_buffer.chunks[i]->Len();
|
||||
}
|
||||
|
||||
a->SetGotStreamDelivery();
|
||||
// May need to catch analyzer up on missed gap?
|
||||
// Analyzer should be fully caught up to stream_offset now.
|
||||
}
|
||||
|
||||
if ( ! a->DeliverStream(data, len) )
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
|
||||
stream_offset += len;
|
||||
|
@ -389,21 +411,20 @@ void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
|||
if ( file_reassembler )
|
||||
{
|
||||
if ( reassembly_max_buffer > 0 &&
|
||||
reassembly_max_buffer < file_reassembler->TotalSize() )
|
||||
reassembly_max_buffer < file_reassembler->TotalSize() )
|
||||
{
|
||||
uint64 first_offset = file_reassembler->GetFirstBlockOffset();
|
||||
int gap_bytes = file_reassembler->TrimToSeq(first_offset);
|
||||
|
||||
uint64 current_offset = stream_offset;
|
||||
uint64 gap_bytes = file_reassembler->Flush();
|
||||
IncrementByteCount(gap_bytes, overflow_bytes_idx);
|
||||
|
||||
if ( FileEventAvailable(file_reassembly_overflow) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(new Val(stream_offset, TYPE_COUNT));
|
||||
vl->append(new Val(current_offset, TYPE_COUNT));
|
||||
vl->append(new Val(gap_bytes, TYPE_COUNT));
|
||||
FileEvent(file_reassembly_overflow, vl);
|
||||
}
|
||||
|
||||
Gap(stream_offset, gap_bytes);
|
||||
}
|
||||
|
||||
// Forward data to the reassembler.
|
||||
|
@ -428,29 +449,28 @@ void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
|||
IncrementByteCount(len, overflow_bytes_idx);
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
|
||||
id.c_str(), len,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||
DBG_LOG(DBG_FILE_ANALYSIS,
|
||||
"[%s] %" PRIu64 " chunk bytes in at offset %" PRIu64 "; %s [%s%s]",
|
||||
id.c_str(), len, offset,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)),
|
||||
len > 40 ? "..." : "");
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
while ( (a = analyzers.NextEntry(c)) )
|
||||
{
|
||||
if ( !a->DeliverChunk(data, len, offset) )
|
||||
if ( ! a->DeliverChunk(data, len, offset) )
|
||||
{
|
||||
analyzers.QueueRemove(a->Tag(), a->Args());
|
||||
}
|
||||
}
|
||||
|
||||
if ( IsComplete() )
|
||||
{
|
||||
// If the file is complete we can automatically go and close out the file from here.
|
||||
EndOfFile();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
|
@ -461,10 +481,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
|||
void File::DataIn(const u_char* data, uint64 len)
|
||||
{
|
||||
analyzers.DrainModifications();
|
||||
|
||||
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
|
||||
LookupFieldDefaultCount(missing_bytes_idx);
|
||||
DeliverChunk(data, len, offset);
|
||||
DeliverChunk(data, len, stream_offset);
|
||||
analyzers.DrainModifications();
|
||||
}
|
||||
|
||||
|
@ -475,20 +492,18 @@ void File::EndOfFile()
|
|||
if ( done )
|
||||
return;
|
||||
|
||||
if ( ! did_mime_type )
|
||||
{
|
||||
if ( ! did_mime_type &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
DetectMIME();
|
||||
|
||||
// TODO: this also needs to be done elsewhere.
|
||||
if ( bof_buffer.size > 0 )
|
||||
{
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
}
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
if ( file_reassembler )
|
||||
{
|
||||
file_reassembler->Flush();
|
||||
analyzers.DrainModifications();
|
||||
}
|
||||
|
||||
done = true;
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
|
@ -507,9 +522,16 @@ void File::EndOfFile()
|
|||
|
||||
void File::Gap(uint64 offset, uint64 len)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %" PRIu64,
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %," PRIu64,
|
||||
id.c_str(), len, offset);
|
||||
|
||||
if ( file_reassembler && ! file_reassembler->IsCurrentlyFlushing() )
|
||||
{
|
||||
file_reassembler->FlushTo(offset + len);
|
||||
// The reassembler will call us back with all the gaps we need to know.
|
||||
return;
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
#ifndef FILE_ANALYSIS_FILE_H
|
||||
#define FILE_ANALYSIS_FILE_H
|
||||
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
@ -89,10 +88,10 @@ public:
|
|||
void SetTotalBytes(uint64 size);
|
||||
|
||||
/**
|
||||
* Compares "seen_bytes" field to "total_bytes" field of #val record to
|
||||
* determine if the full file has been seen.
|
||||
* @return false if "total_bytes" hasn't been set yet or "seen_bytes" is
|
||||
* less than it, else true.
|
||||
* @return true if file analysis is complete for the file, else false.
|
||||
* It is incomplete if the total size is unknown or if the number of bytes
|
||||
* streamed to analyzers (either as data delivers or gap information)
|
||||
* matches the known total size.
|
||||
*/
|
||||
bool IsComplete() const;
|
||||
|
||||
|
@ -175,13 +174,14 @@ protected:
|
|||
* Constructor; only file_analysis::Manager should be creating these.
|
||||
* @param file_id an identifier string for the file in pretty hash form
|
||||
* (similar to connection uids).
|
||||
* @param source_name the value for the source field to fill in.
|
||||
* @param conn a network connection over which the file is transferred.
|
||||
* @param tag the network protocol over which the file is transferred.
|
||||
* @param is_orig true if the file is being transferred from the originator
|
||||
* of the connection to the responder. False indicates the other
|
||||
* direction.
|
||||
*/
|
||||
File(const string& file_id, Connection* conn = 0,
|
||||
File(const string& file_id, const string& source_name, Connection* conn = 0,
|
||||
analyzer::Tag tag = analyzer::Tag::Error, bool is_orig = false);
|
||||
|
||||
/**
|
||||
|
@ -189,8 +189,14 @@ protected:
|
|||
* \c conn_id and UID taken from \a conn.
|
||||
* @param conn the connection over which a part of the file has been seen.
|
||||
* @param is_orig true if the connection originator is sending the file.
|
||||
* @return true if the connection was previously unknown.
|
||||
*/
|
||||
void UpdateConnectionFields(Connection* conn, bool is_orig);
|
||||
bool UpdateConnectionFields(Connection* conn, bool is_orig);
|
||||
|
||||
/**
|
||||
* Raise the file_over_new_connection event with given arguments.
|
||||
*/
|
||||
void RaiseFileOverNewConnection(Connection* conn, bool is_orig);
|
||||
|
||||
/**
|
||||
* Increment a byte count field of #val record by \a size.
|
||||
|
@ -223,15 +229,10 @@ protected:
|
|||
*/
|
||||
bool BufferBOF(const u_char* data, uint64 len);
|
||||
|
||||
/**
|
||||
* Forward any beginning-of-file buffered data on to DataIn stream.
|
||||
*/
|
||||
void ReplayBOF();
|
||||
|
||||
/**
|
||||
* Does mime type detection via file magic signatures and assigns
|
||||
* strongest matching mime type (if available) to \c mime_type
|
||||
* field in #val. It uses the data in the BOF buffer
|
||||
* field in #val. It uses the data in the BOF buffer.
|
||||
* @return whether a mime type match was found.
|
||||
*/
|
||||
bool DetectMIME();
|
||||
|
@ -278,7 +279,7 @@ protected:
|
|||
protected:
|
||||
string id; /**< A pretty hash that likely identifies file */
|
||||
RecordVal* val; /**< \c fa_file from script layer. */
|
||||
FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */
|
||||
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
|
||||
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
|
||||
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
|
||||
bool did_mime_type; /**< Whether the mime type ident has already been attempted. */
|
||||
|
|
|
@ -8,7 +8,7 @@ namespace file_analysis {
|
|||
class File;
|
||||
|
||||
FileReassembler::FileReassembler(File *f, uint64 starting_offset)
|
||||
: Reassembler(starting_offset), the_file(f)
|
||||
: Reassembler(starting_offset), the_file(f), flushing(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,35 @@ FileReassembler::~FileReassembler()
|
|||
{
|
||||
}
|
||||
|
||||
uint64 FileReassembler::Flush()
|
||||
{
|
||||
if ( flushing )
|
||||
return 0;
|
||||
|
||||
if ( last_block )
|
||||
{
|
||||
// This is expected to call back into FileReassembler::Undelivered().
|
||||
flushing = true;
|
||||
uint64 rval = TrimToSeq(last_block->upper);
|
||||
flushing = false;
|
||||
return rval;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64 FileReassembler::FlushTo(uint64 sequence)
|
||||
{
|
||||
if ( flushing )
|
||||
return 0;
|
||||
|
||||
flushing = true;
|
||||
uint64 rval = TrimToSeq(sequence);
|
||||
flushing = false;
|
||||
last_reassem_seq = sequence;
|
||||
return rval;
|
||||
}
|
||||
|
||||
void FileReassembler::BlockInserted(DataBlock* start_block)
|
||||
{
|
||||
if ( start_block->seq > last_reassem_seq ||
|
||||
|
@ -28,7 +57,6 @@ void FileReassembler::BlockInserted(DataBlock* start_block)
|
|||
if ( b->seq == last_reassem_seq )
|
||||
{ // New stuff.
|
||||
uint64 len = b->Size();
|
||||
uint64 seq = last_reassem_seq;
|
||||
last_reassem_seq += len;
|
||||
the_file->DeliverStream(b->block, len);
|
||||
}
|
||||
|
@ -40,7 +68,37 @@ void FileReassembler::BlockInserted(DataBlock* start_block)
|
|||
|
||||
void FileReassembler::Undelivered(uint64 up_to_seq)
|
||||
{
|
||||
// Not doing anything here yet.
|
||||
// If we have blocks that begin below up_to_seq, deliver them.
|
||||
DataBlock* b = blocks;
|
||||
|
||||
while ( b )
|
||||
{
|
||||
if ( b->seq < last_reassem_seq )
|
||||
{
|
||||
// Already delivered this block.
|
||||
b = b->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( b->seq >= up_to_seq )
|
||||
// Block is beyond what we need to process at this point.
|
||||
break;
|
||||
|
||||
uint64 gap_at_seq = last_reassem_seq;
|
||||
uint64 gap_len = b->seq - last_reassem_seq;
|
||||
the_file->Gap(gap_at_seq, gap_len);
|
||||
last_reassem_seq += gap_len;
|
||||
BlockInserted(b);
|
||||
// Inserting a block may cause trimming of what's buffered,
|
||||
// so have to assume 'b' is invalid, hence re-assign to start.
|
||||
b = blocks;
|
||||
}
|
||||
|
||||
if ( up_to_seq > last_reassem_seq )
|
||||
{
|
||||
the_file->Gap(last_reassem_seq, up_to_seq - last_reassem_seq);
|
||||
last_reassem_seq = up_to_seq;
|
||||
}
|
||||
}
|
||||
|
||||
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64 n)
|
||||
|
|
|
@ -11,9 +11,6 @@ namespace file_analysis {
|
|||
|
||||
class File;
|
||||
|
||||
//const int STOP_ON_GAP = 1;
|
||||
//const int PUNT_ON_PARTIAL = 1;
|
||||
|
||||
class FileReassembler : public Reassembler {
|
||||
public:
|
||||
|
||||
|
@ -21,12 +18,35 @@ public:
|
|||
virtual ~FileReassembler();
|
||||
|
||||
void Done();
|
||||
uint64 GetFirstBlockOffset() { return blocks->seq; }
|
||||
|
||||
// Checks if we have delivered all contents that we can possibly
|
||||
// deliver for this endpoint.
|
||||
void CheckEOF();
|
||||
|
||||
/**
|
||||
* Discards all contents of the reassembly buffer. This will spin through
|
||||
* the buffer and call File::DeliverStream() and File::Gap() wherever
|
||||
* appropriate.
|
||||
* @return the number of new bytes now detected as gaps in the file.
|
||||
*/
|
||||
uint64 Flush();
|
||||
|
||||
/**
|
||||
* Discards all contents of the reassembly buffer up to a given sequence
|
||||
* number. This will spin through the buffer and call
|
||||
* File::DeliverStream() and File::Gap() wherever appropriate.
|
||||
* @param sequence the sequence number to flush until.
|
||||
* @return the number of new bytes now detected as gaps in the file.
|
||||
*/
|
||||
uint64 FlushTo(uint64 sequence);
|
||||
|
||||
/**
|
||||
* @return whether the reassembler is currently is the process of flushing
|
||||
* out the contents of its buffer.
|
||||
*/
|
||||
bool IsCurrentlyFlushing() const
|
||||
{ return flushing; }
|
||||
|
||||
protected:
|
||||
FileReassembler() { }
|
||||
|
||||
|
@ -36,10 +56,8 @@ protected:
|
|||
void BlockInserted(DataBlock* b);
|
||||
void Overlap(const u_char* b1, const u_char* b2, uint64 n);
|
||||
|
||||
unsigned int had_gap:1;
|
||||
unsigned int did_EOF:1;
|
||||
unsigned int skip_deliveries:1;
|
||||
File* the_file;
|
||||
bool flushing;
|
||||
};
|
||||
|
||||
} // namespace analyzer::*
|
||||
|
|
|
@ -154,14 +154,12 @@ string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
|||
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
|
||||
const string& source)
|
||||
{
|
||||
File* file = GetFile(file_id);
|
||||
File* file = GetFile(file_id, 0, analyzer::Tag::Error, false, false,
|
||||
source.c_str());
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
|
||||
if ( file->GetSource().empty() )
|
||||
file->SetSource(source);
|
||||
|
||||
file->DataIn(data, len);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
|
@ -299,7 +297,8 @@ bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
|
|||
}
|
||||
|
||||
File* Manager::GetFile(const string& file_id, Connection* conn,
|
||||
analyzer::Tag tag, bool is_orig, bool update_conn)
|
||||
analyzer::Tag tag, bool is_orig, bool update_conn,
|
||||
const char* source_name)
|
||||
{
|
||||
if ( file_id.empty() )
|
||||
return 0;
|
||||
|
@ -311,15 +310,18 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
|
|||
|
||||
if ( ! rval )
|
||||
{
|
||||
rval = new File(file_id, conn, tag, is_orig);
|
||||
rval = new File(file_id,
|
||||
source_name ? source_name
|
||||
: analyzer_mgr->GetComponentName(tag),
|
||||
conn, tag, is_orig);
|
||||
id_map.Insert(file_id.c_str(), rval);
|
||||
rval->ScheduleInactivityTimer();
|
||||
|
||||
// Generate file_new here so the manager knows about the file.
|
||||
// Generate file_new after inserting it into manager's mapping
|
||||
// in case script-layer calls back in to core from the event.
|
||||
rval->FileEvent(file_new);
|
||||
// Same for file_over_new_connection which is generated by
|
||||
// updating the connection fields.
|
||||
rval->UpdateConnectionFields(conn, is_orig);
|
||||
// Same for file_over_new_connection.
|
||||
rval->RaiseFileOverNewConnection(conn, is_orig);
|
||||
|
||||
if ( IsIgnored(file_id) )
|
||||
return 0;
|
||||
|
@ -328,8 +330,8 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
|
|||
{
|
||||
rval->UpdateLastActivityTime();
|
||||
|
||||
if ( update_conn )
|
||||
rval->UpdateConnectionFields(conn, is_orig);
|
||||
if ( update_conn && rval->UpdateConnectionFields(conn, is_orig) )
|
||||
rval->RaiseFileOverNewConnection(conn, is_orig);
|
||||
}
|
||||
|
||||
return rval;
|
||||
|
|
|
@ -319,6 +319,7 @@ protected:
|
|||
* this file isn't related to a connection).
|
||||
* @param update_conn whether we need to update connection-related field
|
||||
* in the \c fa_file record value associated with the file.
|
||||
* @param an optional value of the source field to fill in.
|
||||
* @return the File object mapped to \a file_id or a null pointer if
|
||||
* analysis is being ignored for the associated file. An File
|
||||
* object may be created if a mapping doesn't exist, and if it did
|
||||
|
@ -327,7 +328,8 @@ protected:
|
|||
*/
|
||||
File* GetFile(const string& file_id, Connection* conn = 0,
|
||||
analyzer::Tag tag = analyzer::Tag::Error,
|
||||
bool is_orig = false, bool update_conn = true);
|
||||
bool is_orig = false, bool update_conn = true,
|
||||
const char* source_name = 0);
|
||||
|
||||
/**
|
||||
* Try to retrieve a file that's being analyzed, using its identifier/hash.
|
||||
|
|
|
@ -68,7 +68,6 @@ static bool check_limit_exceeded(uint64 lim, uint64 depth, uint64 len, uint64* n
|
|||
}
|
||||
else if ( depth + len > lim )
|
||||
{
|
||||
printf("exceeded the maximum extraction lenght depth: %llu len: %llu lim: %llu\n", depth, len, lim);
|
||||
*n = lim - depth;
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue