From 9fd78165018aeb036787dc788b10daa0f5eb73fc Mon Sep 17 00:00:00 2001 From: Johanna Amann Date: Fri, 10 Feb 2017 12:34:23 -0800 Subject: [PATCH] Allow File analyzers to direcly pass mime type. This makes it much easier for protocols where the mime type is known in advance like, for example, TLS. We now do no longer have to perform deep script-level magic. --- scripts/base/files/x509/main.bro | 9 ++++++++ scripts/base/init-bare.bro | 3 +++ scripts/base/protocols/krb/files.bro | 6 ----- scripts/base/protocols/rdp/main.bro | 4 ---- scripts/base/protocols/ssl/files.bro | 6 ----- src/analyzer/protocol/krb/krb-padata.pac | 8 +++---- src/analyzer/protocol/rdp/rdp-analyzer.pac | 2 +- .../protocol/ssl/proc-certificate.pac | 5 +++- src/file_analysis/File.cc | 23 +++++++++++++++++++ src/file_analysis/File.h | 20 ++++++++++++++++ src/file_analysis/Manager.cc | 11 +++++++-- src/file_analysis/Manager.h | 18 +++++++++++++-- 12 files changed, 89 insertions(+), 26 deletions(-) diff --git a/scripts/base/files/x509/main.bro b/scripts/base/files/x509/main.bro index bbf99f6a4d..44ea83fc39 100644 --- a/scripts/base/files/x509/main.bro +++ b/scripts/base/files/x509/main.bro @@ -38,6 +38,15 @@ export { event bro_init() &priority=5 { Log::create_stream(X509::LOG, [$columns=Info, $ev=log_x509, $path="x509"]); + + Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-ca-cert"); + # Always calculate hashes. They are not necessary for base scripts + # but very useful for identification, and required for policy scripts + Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-ca-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-ca-cert"); } redef record Files::Info += { diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index ffee527bb7..f3421fd557 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -446,6 +446,9 @@ type fa_metadata: record { mime_type: string &optional; ## All matching mime types if any were discovered. mime_types: mime_matches &optional; + ## Specifies if the mime type was inferred using signatures, + ## or provided by the protocol the file appeared in. + inferred: bool &default=T; }; ## Fields of a SYN packet. diff --git a/scripts/base/protocols/krb/files.bro b/scripts/base/protocols/krb/files.bro index cd2127c605..947c2db41a 100644 --- a/scripts/base/protocols/krb/files.bro +++ b/scripts/base/protocols/krb/files.bro @@ -101,12 +101,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori } c$krb = info; - - Files::add_analyzer(f, Files::ANALYZER_X509); - # Always calculate hashes. They are not necessary for base scripts - # but very useful for identification, and required for policy scripts - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } function fill_in_subjects(c: connection) diff --git a/scripts/base/protocols/rdp/main.bro b/scripts/base/protocols/rdp/main.bro index c6d550c3f7..f543fd2cae 100644 --- a/scripts/base/protocols/rdp/main.bro +++ b/scripts/base/protocols/rdp/main.bro @@ -236,10 +236,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori { # Count up X509 certs. ++c$rdp$cert_count; - - Files::add_analyzer(f, Files::ANALYZER_X509); - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } } diff --git a/scripts/base/protocols/ssl/files.bro b/scripts/base/protocols/ssl/files.bro index fad0fa0483..8a1380a270 100644 --- a/scripts/base/protocols/ssl/files.bro +++ b/scripts/base/protocols/ssl/files.bro @@ -114,12 +114,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori c$ssl$cert_chain[|c$ssl$cert_chain|] = f$info; c$ssl$cert_chain_fuids[|c$ssl$cert_chain_fuids|] = f$id; } - - Files::add_analyzer(f, Files::ANALYZER_X509); - # Always calculate hashes. They are not necessary for base scripts - # but very useful for identification, and required for policy scripts. - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } event ssl_established(c: connection) &priority=6 diff --git a/src/analyzer/protocol/krb/krb-padata.pac b/src/analyzer/protocol/krb/krb-padata.pac index b178239f4d..4667ab1677 100644 --- a/src/analyzer/protocol/krb/krb-padata.pac +++ b/src/analyzer/protocol/krb/krb-padata.pac @@ -75,8 +75,8 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a string file_id = file_mgr->HashHandle(file_handle.Description()); file_mgr->DataIn(reinterpret_cast(cert.data()), - cert.length(), bro_analyzer->GetAnalyzerTag(), - bro_analyzer->Conn(), true, file_id); + cert.length(), bro_analyzer->GetAnalyzerTag(), + bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); break; @@ -99,8 +99,8 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a string file_id = file_mgr->HashHandle(file_handle.Description()); file_mgr->DataIn(reinterpret_cast(cert.data()), - cert.length(), bro_analyzer->GetAnalyzerTag(), - bro_analyzer->Conn(), false, file_id); + cert.length(), bro_analyzer->GetAnalyzerTag(), + bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); break; diff --git a/src/analyzer/protocol/rdp/rdp-analyzer.pac b/src/analyzer/protocol/rdp/rdp-analyzer.pac index 01b47e9478..1ba2c465d8 100644 --- a/src/analyzer/protocol/rdp/rdp-analyzer.pac +++ b/src/analyzer/protocol/rdp/rdp-analyzer.pac @@ -142,7 +142,7 @@ refine flow RDP_Flow += { connection()->bro_analyzer()->GetAnalyzerTag(), connection()->bro_analyzer()->Conn(), false, // It seems there are only server certs? - file_id); + file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); return true; diff --git a/src/analyzer/protocol/ssl/proc-certificate.pac b/src/analyzer/protocol/ssl/proc-certificate.pac index c2353e3a88..ee2247e5be 100644 --- a/src/analyzer/protocol/ssl/proc-certificate.pac +++ b/src/analyzer/protocol/ssl/proc-certificate.pac @@ -9,6 +9,9 @@ common.AddRaw(is_orig ? "T" : "F", 1); bro_analyzer()->Conn()->IDString(&common); + string user_mime = "application/x-x509-user-cert"; + string ca_mime = "application/x-x509-ca-cert"; + for ( unsigned int i = 0; i < certificates->size(); ++i ) { const bytestring& cert = (*certificates)[i]; @@ -21,7 +24,7 @@ file_mgr->DataIn(reinterpret_cast(cert.data()), cert.length(), bro_analyzer()->GetAnalyzerTag(), - bro_analyzer()->Conn(), is_orig, file_id); + bro_analyzer()->Conn(), is_orig, file_id, i == 0 ? user_mime : ca_mime); file_mgr->EndOfFile(file_id); } return true; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 46e67f7cd8..2d9017a338 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -55,6 +55,7 @@ int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; int File::meta_mime_type_idx = -1; int File::meta_mime_types_idx = -1; +int File::meta_inferred_idx = -1; void File::StaticInit() { @@ -76,6 +77,7 @@ void File::StaticInit() bof_buffer_idx = Idx("bof_buffer", fa_file_type); meta_mime_type_idx = Idx("mime_type", fa_metadata_type); meta_mime_types_idx = Idx("mime_types", fa_metadata_type); + meta_inferred_idx = Idx("inferred", fa_metadata_type); } File::File(const string& file_id, const string& source_name, Connection* conn, @@ -290,6 +292,27 @@ void File::SetReassemblyBuffer(uint64 max) reassembly_max_buffer = max; } +bool File::SetMime(const string& mime_type) + { + if ( mime_type.empty() || bof_buffer.size != 0 ) + return false; + + did_metadata_inference = true; + bof_buffer.full = true; + + if ( ! FileEventAvailable(file_sniff) ) + return false; + + val_list* vl = new val_list(); + vl->append(val->Ref()); + RecordVal* meta = new RecordVal(fa_metadata_type); + vl->append(meta); + meta->Assign(meta_mime_type_idx, new StringVal(mime_type)); + meta->Assign(meta_inferred_idx, new Val(0, TYPE_BOOL)); + FileEvent(file_sniff, vl); + return true; + } + void File::InferMetadata() { did_metadata_inference = true; diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index c799907a8f..c52d9efbc4 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -171,6 +171,25 @@ public: */ void FileEvent(EventHandlerPtr h, val_list* vl); + + /** + * Sets the MIME type for a file to a specific value. + * + * Setting the MIME type has to be done before the MIME type is + * inferred from the content. After a MIME type has been set once, + * it cannot be changed anymore. + * + * This function should only be called when it does not make sense + * to perform automated MIME type detections. This is e.g. the case + * in protocols where the file type is fixed in the protocol description. + * This is for example the case for TLS and X.509 certificates. + * + * @param mime_type mime type to set + * @return true if the mime type was set. False if it could not be set because + * a mime type was already set or inferred. + */ + bool SetMime(const string& mime_type); + protected: friend class Manager; friend class FileReassembler; @@ -319,6 +338,7 @@ protected: static int bof_buffer_idx; static int mime_type_idx; static int mime_types_idx; + static int meta_inferred_idx; static int meta_mime_type_idx; static int meta_mime_types_idx; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 217c901969..3140a1e9db 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -110,7 +110,7 @@ void Manager::SetHandle(const string& handle) string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, analyzer::Tag tag, Connection* conn, bool is_orig, - const string& precomputed_id) + const string& precomputed_id, const string& mime_type) { string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; File* file = GetFile(id, conn, tag, is_orig); @@ -118,6 +118,9 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, if ( ! file ) return ""; + if ( ! mime_type.empty() ) + file->SetMime(mime_type); + file->DataIn(data, len, offset); if ( file->IsComplete() ) @@ -130,7 +133,8 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, } string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, - Connection* conn, bool is_orig, const string& precomputed_id) + Connection* conn, bool is_orig, const string& precomputed_id, + const string& mime_type) { string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; // Sequential data input shouldn't be going over multiple conns, so don't @@ -140,6 +144,9 @@ string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, if ( ! file ) return ""; + if ( ! mime_type.empty() ) + file->SetMime(mime_type); + file->DataIn(data, len); if ( file->IsComplete() ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index bcc8ac5dd2..d4ab6c8dfc 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -93,6 +93,12 @@ public: * or false if is being sent in the opposite direction. * @param precomputed_file_id may be set to a previous return value in order to * bypass costly file handle lookups. + * @param mime_type may be set to the mime type of the file, if already known due + * to the protocol. This is, e.g., the case in TLS connections where X.509 + * certificates are passed as files; here the type of the file is set by + * the protocol. If this parameter is give, mime type detection will be + * disabled. + * This parameter is only used for the first bit of data for each file. * @return a unique file ID string which, in certain contexts, may be * cached and passed back in to a subsequent function call in order * to avoid costly file handle lookups (which have to go through @@ -101,7 +107,8 @@ public: */ std::string DataIn(const u_char* data, uint64 len, uint64 offset, analyzer::Tag tag, Connection* conn, bool is_orig, - const std::string& precomputed_file_id = ""); + const std::string& precomputed_file_id = "", + const std::string& mime_type = ""); /** * Pass in sequential file data. @@ -113,6 +120,12 @@ public: * or false if is being sent in the opposite direction. * @param precomputed_file_id may be set to a previous return value in order to * bypass costly file handle lookups. + * @param mime_type may be set to the mime type of the file, if already known due + * to the protocol. This is, e.g., the case in TLS connections where X.509 + * certificates are passed as files; here the type of the file is set by + * the protocol. If this parameter is give, mime type detection will be + * disabled. + * This parameter is only used for the first bit of data for each file. * @return a unique file ID string which, in certain contexts, may be * cached and passed back in to a subsequent function call in order * to avoid costly file handle lookups (which have to go through @@ -121,7 +134,8 @@ public: */ std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag, Connection* conn, bool is_orig, - const std::string& precomputed_file_id = ""); + const std::string& precomputed_file_id = "", + const std::string& mime_type = ""); /** * Pass in sequential file data from external source (e.g. input framework).