mirror of
https://github.com/zeek/zeek.git
synced 2025-10-03 07:08:19 +00:00
API changes to file analysis mime type detection.
Removed "file_mime_type" and "file_mime_types" event, replacing them with a new event called "file_metadata_inferred". It has a record argument of type "inferred_file_metadata", which contains the mime type information that the earlier events used to supply. The idea here is that future extensions to the record with new metadata will be less likely to break user code than the alternatives (adding new events or new event parameters). Addresses BIT-1368.
This commit is contained in:
parent
bd1191c60b
commit
a55ce01ef3
20 changed files with 170 additions and 136 deletions
|
@ -53,31 +53,35 @@ int File::overflow_bytes_idx = -1;
|
|||
int File::timeout_interval_idx = -1;
|
||||
int File::bof_buffer_size_idx = -1;
|
||||
int File::bof_buffer_idx = -1;
|
||||
int File::meta_mime_type_idx = -1;
|
||||
int File::meta_mime_types_idx = -1;
|
||||
|
||||
void File::StaticInit()
|
||||
{
|
||||
if ( id_idx != -1 )
|
||||
return;
|
||||
|
||||
id_idx = Idx("id");
|
||||
parent_id_idx = Idx("parent_id");
|
||||
source_idx = Idx("source");
|
||||
is_orig_idx = Idx("is_orig");
|
||||
conns_idx = Idx("conns");
|
||||
last_active_idx = Idx("last_active");
|
||||
seen_bytes_idx = Idx("seen_bytes");
|
||||
total_bytes_idx = Idx("total_bytes");
|
||||
missing_bytes_idx = Idx("missing_bytes");
|
||||
overflow_bytes_idx = Idx("overflow_bytes");
|
||||
timeout_interval_idx = Idx("timeout_interval");
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size");
|
||||
bof_buffer_idx = Idx("bof_buffer");
|
||||
id_idx = Idx("id", fa_file_type);
|
||||
parent_id_idx = Idx("parent_id", fa_file_type);
|
||||
source_idx = Idx("source", fa_file_type);
|
||||
is_orig_idx = Idx("is_orig", fa_file_type);
|
||||
conns_idx = Idx("conns", fa_file_type);
|
||||
last_active_idx = Idx("last_active", fa_file_type);
|
||||
seen_bytes_idx = Idx("seen_bytes", fa_file_type);
|
||||
total_bytes_idx = Idx("total_bytes", fa_file_type);
|
||||
missing_bytes_idx = Idx("missing_bytes", fa_file_type);
|
||||
overflow_bytes_idx = Idx("overflow_bytes", fa_file_type);
|
||||
timeout_interval_idx = Idx("timeout_interval", fa_file_type);
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size", fa_file_type);
|
||||
bof_buffer_idx = Idx("bof_buffer", fa_file_type);
|
||||
meta_mime_type_idx = Idx("mime_type", inferred_file_metadata_type);
|
||||
meta_mime_types_idx = Idx("mime_types", inferred_file_metadata_type);
|
||||
}
|
||||
|
||||
File::File(const string& file_id, const string& source_name, Connection* conn,
|
||||
analyzer::Tag tag, bool is_orig)
|
||||
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
|
||||
reassembly_max_buffer(0), did_mime_type(false),
|
||||
reassembly_max_buffer(0), did_metadata_inference(false),
|
||||
reassembly_enabled(false), postpone_timeout(false), done(false),
|
||||
analyzers(this)
|
||||
{
|
||||
|
@ -169,11 +173,13 @@ double File::LookupFieldDefaultInterval(int idx) const
|
|||
return rval;
|
||||
}
|
||||
|
||||
int File::Idx(const string& field)
|
||||
int File::Idx(const string& field, const RecordType* type)
|
||||
{
|
||||
int rval = fa_file_type->FieldOffset(field.c_str());
|
||||
int rval = type->FieldOffset(field.c_str());
|
||||
|
||||
if ( rval < 0 )
|
||||
reporter->InternalError("Unknown fa_file field: %s", field.c_str());
|
||||
reporter->InternalError("Unknown %s field: %s", type->GetName().c_str(),
|
||||
field.c_str());
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
@ -281,48 +287,46 @@ void File::SetReassemblyBuffer(uint64 max)
|
|||
reassembly_max_buffer = max;
|
||||
}
|
||||
|
||||
bool File::DetectMIME()
|
||||
void File::InferMetadata()
|
||||
{
|
||||
did_mime_type = true;
|
||||
did_metadata_inference = true;
|
||||
|
||||
Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
|
||||
|
||||
if ( ! bof_buffer_val )
|
||||
{
|
||||
if ( bof_buffer.size == 0 )
|
||||
return false;
|
||||
return;
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
bof_buffer_val = new StringVal(bs);
|
||||
val->Assign(bof_buffer_idx, bof_buffer_val);
|
||||
}
|
||||
|
||||
if ( ! FileEventAvailable(file_metadata_inferred) )
|
||||
return;
|
||||
|
||||
RuleMatcher::MIME_Matches matches;
|
||||
const u_char* data = bof_buffer_val->AsString()->Bytes();
|
||||
uint64 len = bof_buffer_val->AsString()->Len();
|
||||
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
|
||||
file_mgr->DetectMIME(data, len, &matches);
|
||||
|
||||
if ( matches.empty() )
|
||||
return false;
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
RecordVal* meta = new RecordVal(inferred_file_metadata_type);
|
||||
vl->append(meta);
|
||||
|
||||
if ( FileEventAvailable(file_mime_type) )
|
||||
if ( ! matches.empty() )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(new StringVal(*(matches.begin()->second.begin())));
|
||||
FileEvent(file_mime_type, vl);
|
||||
meta->Assign(meta_mime_type_idx,
|
||||
new StringVal(*(matches.begin()->second.begin())));
|
||||
meta->Assign(meta_mime_types_idx,
|
||||
file_analysis::GenMIMEMatchesVal(matches));
|
||||
}
|
||||
|
||||
if ( FileEventAvailable(file_mime_types) )
|
||||
{
|
||||
val_list* vl = new val_list();
|
||||
vl->append(val->Ref());
|
||||
vl->append(file_analysis::GenMIMEMatchesVal(matches));
|
||||
FileEvent(file_mime_types, vl);
|
||||
}
|
||||
|
||||
return true;
|
||||
FileEvent(file_metadata_inferred, vl);
|
||||
return;
|
||||
}
|
||||
|
||||
bool File::BufferBOF(const u_char* data, uint64 len)
|
||||
|
@ -355,9 +359,9 @@ void File::DeliverStream(const u_char* data, uint64 len)
|
|||
// Buffer enough data for the BOF buffer
|
||||
BufferBOF(data, len);
|
||||
|
||||
if ( ! did_mime_type && bof_buffer.full &&
|
||||
if ( ! did_metadata_inference && bof_buffer.full &&
|
||||
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
|
||||
DetectMIME();
|
||||
InferMetadata();
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS,
|
||||
"[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
|
||||
|
@ -582,7 +586,7 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
|
|||
mgr.QueueEvent(h, vl);
|
||||
|
||||
if ( h == file_new || h == file_over_new_connection ||
|
||||
h == file_mime_type ||
|
||||
h == file_metadata_inferred ||
|
||||
h == file_timeout || h == file_extraction_limit )
|
||||
{
|
||||
// immediate feedback is required for these events.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue