mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Merge remote-tracking branch 'origin/topic/seth/more-file-type-ident-fixes'
* origin/topic/seth/more-file-type-ident-fixes: File API updates complete. Fixes for file type identification. API changes to file analysis mime type detection. Make HTTP 206 reassembly require ETags by default. More file type identification improvements Fix an issue with files having gaps before the bof_buffer is filled. Fix an issue with packet loss in http file reporting. Adding WOFF fonts to file type identification. Extended JSON matching and added OCSP responses. Another large signature update. More signature updates. Even more file type ident clean up. Lots of fixes for file type identification. BIT-1368 #merged
This commit is contained in:
commit
ed91732e09
50 changed files with 1402 additions and 2755 deletions
|
@ -52,9 +52,10 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
|
|||
|
||||
if ( analyzer_map.Lookup(key) )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id"
|
||||
" %s: already exists", file_mgr->GetComponentName(tag).c_str(),
|
||||
file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists",
|
||||
file->GetID().c_str(),
|
||||
file_mgr->GetComponentName(tag).c_str());
|
||||
|
||||
delete key;
|
||||
return true;
|
||||
}
|
||||
|
@ -92,9 +93,9 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
|
|||
{
|
||||
if ( set->analyzer_map.Lookup(key) )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id"
|
||||
" %s: already exists", file_mgr->GetComponentName(a->Tag()).c_str(),
|
||||
a->GetFile()->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists",
|
||||
a->GetFile()->GetID().c_str(),
|
||||
file_mgr->GetComponentName(a->Tag()).c_str());
|
||||
|
||||
Abort();
|
||||
return true;
|
||||
|
@ -119,14 +120,14 @@ bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
|
|||
|
||||
if ( ! a )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s",
|
||||
file->GetID().c_str(), file_mgr->GetComponentName(tag).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(tag).c_str(),
|
||||
file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s",
|
||||
file->GetID().c_str(),
|
||||
file_mgr->GetComponentName(tag).c_str());
|
||||
|
||||
a->Done();
|
||||
delete a;
|
||||
|
@ -168,8 +169,9 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
|
|||
|
||||
if ( ! a )
|
||||
{
|
||||
reporter->Error("Failed file analyzer %s instantiation for file id %s",
|
||||
file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
|
||||
reporter->Error("[%s] Failed file analyzer %s instantiation",
|
||||
file->GetID().c_str(),
|
||||
file_mgr->GetComponentName(tag).c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -178,8 +180,8 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
|
|||
|
||||
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s",
|
||||
file_mgr->GetComponentName(a->Tag()).c_str(), file->GetID().c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s",
|
||||
file->GetID().c_str(), file_mgr->GetComponentName(a->Tag()).c_str());
|
||||
analyzer_map.Insert(key, a);
|
||||
delete key;
|
||||
|
||||
|
@ -191,7 +193,7 @@ void AnalyzerSet::DrainModifications()
|
|||
if ( mod_queue.empty() )
|
||||
return;
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s",
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Start analyzer mod queue flush",
|
||||
file->GetID().c_str());
|
||||
do
|
||||
{
|
||||
|
@ -200,6 +202,6 @@ void AnalyzerSet::DrainModifications()
|
|||
delete mod;
|
||||
mod_queue.pop();
|
||||
} while ( ! mod_queue.empty() );
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s",
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End flushing analyzer mod queue.",
|
||||
file->GetID().c_str());
|
||||
}
|
||||
|
|
|
@ -53,32 +53,36 @@ int File::overflow_bytes_idx = -1;
|
|||
int File::timeout_interval_idx = -1;
|
||||
int File::bof_buffer_size_idx = -1;
|
||||
int File::bof_buffer_idx = -1;
|
||||
int File::meta_mime_type_idx = -1;
|
||||
int File::meta_mime_types_idx = -1;
|
||||
|
||||
void File::StaticInit()
|
||||
{
|
||||
if ( id_idx != -1 )
|
||||
return;
|
||||
|
||||
id_idx = Idx("id");
|
||||
parent_id_idx = Idx("parent_id");
|
||||
source_idx = Idx("source");
|
||||
is_orig_idx = Idx("is_orig");
|
||||
conns_idx = Idx("conns");
|
||||
last_active_idx = Idx("last_active");
|
||||
seen_bytes_idx = Idx("seen_bytes");
|
||||
total_bytes_idx = Idx("total_bytes");
|
||||
missing_bytes_idx = Idx("missing_bytes");
|
||||
overflow_bytes_idx = Idx("overflow_bytes");
|
||||
timeout_interval_idx = Idx("timeout_interval");
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size");
|
||||
bof_buffer_idx = Idx("bof_buffer");
|
||||
id_idx = Idx("id", fa_file_type);
|
||||
parent_id_idx = Idx("parent_id", fa_file_type);
|
||||
source_idx = Idx("source", fa_file_type);
|
||||
is_orig_idx = Idx("is_orig", fa_file_type);
|
||||
conns_idx = Idx("conns", fa_file_type);
|
||||
last_active_idx = Idx("last_active", fa_file_type);
|
||||
seen_bytes_idx = Idx("seen_bytes", fa_file_type);
|
||||
total_bytes_idx = Idx("total_bytes", fa_file_type);
|
||||
missing_bytes_idx = Idx("missing_bytes", fa_file_type);
|
||||
overflow_bytes_idx = Idx("overflow_bytes", fa_file_type);
|
||||
timeout_interval_idx = Idx("timeout_interval", fa_file_type);
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size", fa_file_type);
|
||||
bof_buffer_idx = Idx("bof_buffer", fa_file_type);
|
||||
meta_mime_type_idx = Idx("mime_type", fa_metadata_type);
|
||||
meta_mime_types_idx = Idx("mime_types", fa_metadata_type);
|
||||
}
|
||||
|
||||
File::File(const string& file_id, const string& source_name, Connection* conn,
|
||||
analyzer::Tag tag, bool is_orig)
|
||||
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
|
||||
reassembly_max_buffer(0), did_mime_type(false),
|
||||
reassembly_enabled(false), postpone_timeout(false), done(false),
|
||||
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
|
||||
reassembly_max_buffer(0), did_metadata_inference(false),
|
||||
reassembly_enabled(false), postpone_timeout(false), done(false),
|
||||
analyzers(this)
|
||||
{
|
||||
StaticInit();
|
||||
|
@ -169,11 +173,13 @@ double File::LookupFieldDefaultInterval(int idx) const
|
|||
return rval;
|
||||
}
|
||||
|
||||
int File::Idx(const string& field)
|
||||
int File::Idx(const string& field, const RecordType* type)
|
||||
{
|
||||
int rval = fa_file_type->FieldOffset(field.c_str());
|
||||
int rval = type->FieldOffset(field.c_str());
|
||||
|
||||
if ( rval < 0 )
|
||||
reporter->InternalError("Unknown fa_file field: %s", field.c_str());
|
||||
reporter->InternalError("Unknown %s field: %s", type->GetName().c_str(),
|
||||
field.c_str());
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
@ -281,48 +287,46 @@ void File::SetReassemblyBuffer(uint64 max)
|
|||
reassembly_max_buffer = max;
|
||||
}
|
||||
|
||||
bool File::DetectMIME()
|
||||
void File::InferMetadata()
|
||||
{
|
||||
did_mime_type = true;
|
||||
did_metadata_inference = true;
|
||||
|
||||
Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
|
||||
|
||||
if ( ! bof_buffer_val )
|
||||
{
|
||||
if ( bof_buffer.size == 0 )
|
||||
return false;
|
||||
return;
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
bof_buffer_val = new StringVal(bs);
|
||||
val->Assign(bof_buffer_idx, bof_buffer_val);
|
||||
}
|
||||
|
||||
if ( ! FileEventAvailable(file_sniff) )
|
||||
return;
|
||||
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
const u_char* data = bof_buffer_val->AsString()->Bytes();
|
||||
uint64 len = bof_buffer_val->AsString()->Len();
|
||||
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
|
||||
file_mgr->DetectMIME(data, len, &matches);
|
||||
|
||||
if ( matches.empty() )
|
||||
return false;
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
RecordVal* meta = new RecordVal(fa_metadata_type);
|
||||
vl->append(meta);
|
||||
|
||||
if ( FileEventAvailable(file_mime_type) )
|
||||
if ( ! matches.empty() )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(new StringVal(*(matches.begin()->second.begin())));
|
||||
FileEvent(file_mime_type, vl);
|
||||
meta->Assign(meta_mime_type_idx,
|
||||
new StringVal(*(matches.begin()->second.begin())));
|
||||
meta->Assign(meta_mime_types_idx,
|
||||
file_analysis::GenMIMEMatchesVal(matches));
|
||||
}
|
||||
|
||||
if ( FileEventAvailable(file_mime_types) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(file_analysis::GenMIMEMatchesVal(matches));
|
||||
FileEvent(file_mime_types, vl);
|
||||
}
|
||||
|
||||
return true;
|
||||
FileEvent(file_sniff, vl);
|
||||
return;
|
||||
}
|
||||
|
||||
bool File::BufferBOF(const u_char* data, uint64 len)
|
||||
|
@ -355,9 +359,9 @@ void File::DeliverStream(const u_char* data, uint64 len)
|
|||
// Buffer enough data for the BOF buffer
|
||||
BufferBOF(data, len);
|
||||
|
||||
if ( ! did_mime_type && bof_buffer.full &&
|
||||
if ( ! did_metadata_inference && bof_buffer.full &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
DetectMIME();
|
||||
InferMetadata();
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS,
|
||||
"[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
|
||||
|
@ -438,7 +442,7 @@ void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
|
|||
}
|
||||
else if ( reassembly_enabled )
|
||||
{
|
||||
// This is data that doesn't match the offset and the reassembler
|
||||
// This is data that doesn't match the offset and the reassembler
|
||||
// needs to be enabled.
|
||||
file_reassembler = new FileReassembler(this, stream_offset);
|
||||
file_reassembler->NewBlock(network_time, offset, len, data);
|
||||
|
@ -502,10 +506,10 @@ void File::EndOfFile()
|
|||
// any stream analyzers.
|
||||
if ( ! bof_buffer.full )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] File over but bof_buffer not full.", id.c_str());
|
||||
bof_buffer.full = true;
|
||||
DeliverStream((const u_char*) "", 0);
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
done = true;
|
||||
|
@ -536,7 +540,12 @@ void File::Gap(uint64 offset, uint64 len)
|
|||
return;
|
||||
}
|
||||
|
||||
analyzers.DrainModifications();
|
||||
if ( ! bof_buffer.full )
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] File gap before bof_buffer filled, continued without attempting to fill bof_buffer.", id.c_str());
|
||||
bof_buffer.full = true;
|
||||
DeliverStream((const u_char*) "", 0);
|
||||
}
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
@ -582,7 +591,7 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
|
|||
mgr.QueueEvent(h, vl);
|
||||
|
||||
if ( h == file_new || h == file_over_new_connection ||
|
||||
h == file_mime_type ||
|
||||
h == file_sniff ||
|
||||
h == file_timeout || h == file_extraction_limit )
|
||||
{
|
||||
// immediate feedback is required for these events.
|
||||
|
|
|
@ -230,12 +230,11 @@ protected:
|
|||
bool BufferBOF(const u_char* data, uint64 len);
|
||||
|
||||
/**
|
||||
* Does mime type detection via file magic signatures and assigns
|
||||
* strongest matching mime type (if available) to \c mime_type
|
||||
* field in #val. It uses the data in the BOF buffer.
|
||||
* @return whether a mime type match was found.
|
||||
* Does metadata inference (e.g. mime type detection via file
|
||||
* magic signatures) using data in the BOF (beginning-of-file) buffer
|
||||
* and raises an event with the metadata.
|
||||
*/
|
||||
bool DetectMIME();
|
||||
void InferMetadata();
|
||||
|
||||
/**
|
||||
* Enables reassembly on the file.
|
||||
|
@ -266,10 +265,11 @@ protected:
|
|||
|
||||
/**
|
||||
* Lookup a record field index/offset by name.
|
||||
* @param field_name the name of the \c fa_file record field.
|
||||
* @param field_name the name of the record field.
|
||||
* @param type the record type for which the field will be looked up.
|
||||
* @return the field offset in #val record corresponding to \a field_name.
|
||||
*/
|
||||
static int Idx(const string& field_name);
|
||||
static int Idx(const string& field_name, const RecordType* type);
|
||||
|
||||
/**
|
||||
* Initializes static member.
|
||||
|
@ -282,7 +282,7 @@ protected:
|
|||
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
|
||||
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
|
||||
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
|
||||
bool did_mime_type; /**< Whether the mime type ident has already been attempted. */
|
||||
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
|
||||
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
|
||||
bool postpone_timeout; /**< Whether postponing timeout is requested. */
|
||||
bool done; /**< If this object is about to be deleted. */
|
||||
|
@ -313,6 +313,9 @@ protected:
|
|||
static int bof_buffer_idx;
|
||||
static int mime_type_idx;
|
||||
static int mime_types_idx;
|
||||
|
||||
static int meta_mime_type_idx;
|
||||
static int meta_mime_types_idx;
|
||||
};
|
||||
|
||||
} // namespace file_analysis
|
||||
|
|
|
@ -390,7 +390,7 @@ bool Manager::RemoveFile(const string& file_id)
|
|||
if ( ! f )
|
||||
return false;
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
|
||||
|
||||
f->EndOfFile();
|
||||
delete f;
|
||||
|
@ -467,8 +467,8 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
|
|||
return 0;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
|
||||
GetComponentName(tag).c_str(), f->id.c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s",
|
||||
f->id.c_str(), GetComponentName(tag).c_str());
|
||||
|
||||
Analyzer* a = c->Factory()(args, f);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue