Lots of fixes for file type identification.

- Plain text now identified with BOMs for UTF8,16,32
   (even though 16 and 32 wouldn't get identified as plain text, oh-well)
 - X.509 certificates are now populating files.log with
   the mime type application/pkix-cert.
 - File signatures are split apart into file types
   to help group and organize signatures a bit better.
 - Normalized some FILE_ANALYSIS debug messages.
 - Improved Javascript detection.
 - Improved HTML detection.
 - Removed a bunch of bad signatures.
 - Merged a bunch of signatures that ultimately detected
   the same mime type.
 - Added detection for MS LNK files.
 - Added detection for cross-domain-policy XML files.
 - Added detection for SOAP envelopes.
This commit is contained in:
Seth Hall 2015-03-13 22:14:44 -04:00
parent c56df225b0
commit ee3e885712
14 changed files with 750 additions and 1894 deletions

View file

@ -52,9 +52,10 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
if ( analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id"
" %s: already exists", file_mgr->GetComponentName(tag).c_str(),
file->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists",
file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
delete key;
return true;
}
@ -92,9 +93,9 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{
if ( set->analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id"
" %s: already exists", file_mgr->GetComponentName(a->Tag()).c_str(),
a->GetFile()->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists",
a->GetFile()->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str());
Abort();
return true;
@ -119,14 +120,14 @@ bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s",
file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s",
file->GetID().c_str(), file_mgr->GetComponentName(tag).c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s",
file_mgr->GetComponentName(tag).c_str(),
file->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s",
file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
a->Done();
delete a;
@ -168,8 +169,9 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
if ( ! a )
{
reporter->Error("Failed file analyzer %s instantiation for file id %s",
file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
reporter->Error("[%s] Failed file analyzer %s instantiation",
file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return 0;
}
@ -178,8 +180,8 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s",
file_mgr->GetComponentName(a->Tag()).c_str(), file->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s",
file->GetID().c_str(), file_mgr->GetComponentName(a->Tag()).c_str());
analyzer_map.Insert(key, a);
delete key;
@ -191,7 +193,7 @@ void AnalyzerSet::DrainModifications()
if ( mod_queue.empty() )
return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s",
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Start analyzer mod queue flush",
file->GetID().c_str());
do
{
@ -200,6 +202,6 @@ void AnalyzerSet::DrainModifications()
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s",
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End flushing analyzer mod queue.",
file->GetID().c_str());
}

View file

@ -304,7 +304,9 @@ bool File::DetectMIME()
file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() )
{
return false;
}
if ( FileEventAvailable(file_mime_type) )
{
@ -502,10 +504,10 @@ void File::EndOfFile()
// any stream analyzers.
if ( ! bof_buffer.full )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] File over but bof_buffer not full.", id.c_str());
bof_buffer.full = true;
DeliverStream((const u_char*) "", 0);
}
analyzers.DrainModifications();
done = true;
@ -582,7 +584,7 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_over_new_connection ||
h == file_mime_type ||
h == file_mime_type || h == file_mime_types ||
h == file_timeout || h == file_extraction_limit )
{
// immediate feedback is required for these events.

View file

@ -390,7 +390,7 @@ bool Manager::RemoveFile(const string& file_id)
if ( ! f )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
f->EndOfFile();
delete f;
@ -467,8 +467,8 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return 0;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
GetComponentName(tag).c_str(), f->id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s",
f->id.c_str(), GetComponentName(tag).c_str());
Analyzer* a = c->Factory()(args, f);