Merge remote-tracking branch 'origin/topic/seth/more-file-type-ident-fixes'

* origin/topic/seth/more-file-type-ident-fixes:
  File API updates complete.
  Fixes for file type identification.
  API changes to file analysis mime type detection.
  Make HTTP 206 reassembly require ETags by default.
  More file type identification improvements
  Fix an issue with files having gaps before the bof_buffer is filled.
  Fix an issue with packet loss in http file reporting.
  Adding WOFF fonts to file type identification.
  Extended JSON matching and added OCSP responses.
  Another large signature update.
  More signature updates.
  Even more file type ident clean up.
  Lots of fixes for file type identification.

BIT-1368 #merged
This commit is contained in:
Robin Sommer 2015-04-20 13:12:39 -07:00
commit ed91732e09
50 changed files with 1402 additions and 2755 deletions

View file

@ -53,32 +53,36 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::meta_mime_type_idx = -1;
int File::meta_mime_types_idx = -1;
void File::StaticInit()
{
if ( id_idx != -1 )
return;
id_idx = Idx("id");
parent_id_idx = Idx("parent_id");
source_idx = Idx("source");
is_orig_idx = Idx("is_orig");
conns_idx = Idx("conns");
last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes");
total_bytes_idx = Idx("total_bytes");
missing_bytes_idx = Idx("missing_bytes");
overflow_bytes_idx = Idx("overflow_bytes");
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
id_idx = Idx("id", fa_file_type);
parent_id_idx = Idx("parent_id", fa_file_type);
source_idx = Idx("source", fa_file_type);
is_orig_idx = Idx("is_orig", fa_file_type);
conns_idx = Idx("conns", fa_file_type);
last_active_idx = Idx("last_active", fa_file_type);
seen_bytes_idx = Idx("seen_bytes", fa_file_type);
total_bytes_idx = Idx("total_bytes", fa_file_type);
missing_bytes_idx = Idx("missing_bytes", fa_file_type);
overflow_bytes_idx = Idx("overflow_bytes", fa_file_type);
timeout_interval_idx = Idx("timeout_interval", fa_file_type);
bof_buffer_size_idx = Idx("bof_buffer_size", fa_file_type);
bof_buffer_idx = Idx("bof_buffer", fa_file_type);
meta_mime_type_idx = Idx("mime_type", fa_metadata_type);
meta_mime_types_idx = Idx("mime_types", fa_metadata_type);
}
File::File(const string& file_id, const string& source_name, Connection* conn,
analyzer::Tag tag, bool is_orig)
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
reassembly_max_buffer(0), did_mime_type(false),
reassembly_enabled(false), postpone_timeout(false), done(false),
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
reassembly_max_buffer(0), did_metadata_inference(false),
reassembly_enabled(false), postpone_timeout(false), done(false),
analyzers(this)
{
StaticInit();
@ -169,11 +173,13 @@ double File::LookupFieldDefaultInterval(int idx) const
return rval;
}
int File::Idx(const string& field)
int File::Idx(const string& field, const RecordType* type)
{
int rval = fa_file_type->FieldOffset(field.c_str());
int rval = type->FieldOffset(field.c_str());
if ( rval < 0 )
reporter->InternalError("Unknown fa_file field: %s", field.c_str());
reporter->InternalError("Unknown %s field: %s", type->GetName().c_str(),
field.c_str());
return rval;
}
@ -281,48 +287,46 @@ void File::SetReassemblyBuffer(uint64 max)
reassembly_max_buffer = max;
}
bool File::DetectMIME()
void File::InferMetadata()
{
did_mime_type = true;
did_metadata_inference = true;
Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
if ( ! bof_buffer_val )
{
if ( bof_buffer.size == 0 )
return false;
return;
BroString* bs = concatenate(bof_buffer.chunks);
bof_buffer_val = new StringVal(bs);
val->Assign(bof_buffer_idx, bof_buffer_val);
}
if ( ! FileEventAvailable(file_sniff) )
return;
RuleMatcher::MIME_Matches matches;
const u_char* data = bof_buffer_val->AsString()->Bytes();
uint64 len = bof_buffer_val->AsString()->Len();
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() )
return false;
val_list* vl = new val_list();
vl->append(val->Ref());
RecordVal* meta = new RecordVal(fa_metadata_type);
vl->append(meta);
if ( FileEventAvailable(file_mime_type) )
if ( ! matches.empty() )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new StringVal(*(matches.begin()->second.begin())));
FileEvent(file_mime_type, vl);
meta->Assign(meta_mime_type_idx,
new StringVal(*(matches.begin()->second.begin())));
meta->Assign(meta_mime_types_idx,
file_analysis::GenMIMEMatchesVal(matches));
}
if ( FileEventAvailable(file_mime_types) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(file_analysis::GenMIMEMatchesVal(matches));
FileEvent(file_mime_types, vl);
}
return true;
FileEvent(file_sniff, vl);
return;
}
bool File::BufferBOF(const u_char* data, uint64 len)
@ -355,9 +359,9 @@ void File::DeliverStream(const u_char* data, uint64 len)
// Buffer enough data for the BOF buffer
BufferBOF(data, len);
if ( ! did_mime_type && bof_buffer.full &&
if ( ! did_metadata_inference && bof_buffer.full &&
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
DetectMIME();
InferMetadata();
DBG_LOG(DBG_FILE_ANALYSIS,
"[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
@ -438,7 +442,7 @@ void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
}
else if ( reassembly_enabled )
{
// This is data that doesn't match the offset and the reassembler
// This is data that doesn't match the offset and the reassembler
// needs to be enabled.
file_reassembler = new FileReassembler(this, stream_offset);
file_reassembler->NewBlock(network_time, offset, len, data);
@ -502,10 +506,10 @@ void File::EndOfFile()
// any stream analyzers.
if ( ! bof_buffer.full )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] File over but bof_buffer not full.", id.c_str());
bof_buffer.full = true;
DeliverStream((const u_char*) "", 0);
}
analyzers.DrainModifications();
done = true;
@ -536,7 +540,12 @@ void File::Gap(uint64 offset, uint64 len)
return;
}
analyzers.DrainModifications();
if ( ! bof_buffer.full )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] File gap before bof_buffer filled, continued without attempting to fill bof_buffer.", id.c_str());
bof_buffer.full = true;
DeliverStream((const u_char*) "", 0);
}
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
@ -582,7 +591,7 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_over_new_connection ||
h == file_mime_type ||
h == file_sniff ||
h == file_timeout || h == file_extraction_limit )
{
// immediate feedback is required for these events.