Lots of fixes for file type identification.

- Plain text now identified with BOMs for UTF8,16,32
   (even though 16 and 32 wouldn't get identified as plain text, oh-well)
 - X.509 certificates are now populating files.log with
   the mime type application/pkix-cert.
 - File signatures are split apart into file types
   to help group and organize signatures a bit better.
 - Normalized some FILE_ANALYSIS debug messages.
 - Improved Javascript detection.
 - Improved HTML detection.
 - Removed a bunch of bad signatures.
 - Merged a bunch of signatures that ultimately detected
   the same mime type.
 - Added detection for MS LNK files.
 - Added detection for cross-domain-policy XML files.
 - Added detection for SOAP envelopes.
This commit is contained in:
Seth Hall 2015-03-13 22:14:44 -04:00
parent c56df225b0
commit ee3e885712
14 changed files with 750 additions and 1894 deletions

View file

@ -390,7 +390,7 @@ bool Manager::RemoveFile(const string& file_id)
if ( ! f )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
f->EndOfFile();
delete f;
@ -467,8 +467,8 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return 0;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
GetComponentName(tag).c_str(), f->id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s",
f->id.c_str(), GetComponentName(tag).c_str());
Analyzer* a = c->Factory()(args, f);