diff --git a/scripts/base/files/x509/main.bro b/scripts/base/files/x509/main.bro index bbf99f6a4d..44ea83fc39 100644 --- a/scripts/base/files/x509/main.bro +++ b/scripts/base/files/x509/main.bro @@ -38,6 +38,15 @@ export { event bro_init() &priority=5 { Log::create_stream(X509::LOG, [$columns=Info, $ev=log_x509, $path="x509"]); + + Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-ca-cert"); + # Always calculate hashes. They are not necessary for base scripts + # but very useful for identification, and required for policy scripts + Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-ca-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-ca-cert"); } redef record Files::Info += { diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index ffee527bb7..f3421fd557 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -446,6 +446,9 @@ type fa_metadata: record { mime_type: string &optional; ## All matching mime types if any were discovered. mime_types: mime_matches &optional; + ## Specifies if the mime type was inferred using signatures, + ## or provided by the protocol the file appeared in. + inferred: bool &default=T; }; ## Fields of a SYN packet. diff --git a/scripts/base/protocols/krb/files.bro b/scripts/base/protocols/krb/files.bro index cd2127c605..947c2db41a 100644 --- a/scripts/base/protocols/krb/files.bro +++ b/scripts/base/protocols/krb/files.bro @@ -101,12 +101,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori } c$krb = info; - - Files::add_analyzer(f, Files::ANALYZER_X509); - # Always calculate hashes. They are not necessary for base scripts - # but very useful for identification, and required for policy scripts - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } function fill_in_subjects(c: connection) diff --git a/scripts/base/protocols/rdp/main.bro b/scripts/base/protocols/rdp/main.bro index c6d550c3f7..f543fd2cae 100644 --- a/scripts/base/protocols/rdp/main.bro +++ b/scripts/base/protocols/rdp/main.bro @@ -236,10 +236,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori { # Count up X509 certs. ++c$rdp$cert_count; - - Files::add_analyzer(f, Files::ANALYZER_X509); - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } } diff --git a/scripts/base/protocols/ssl/files.bro b/scripts/base/protocols/ssl/files.bro index fad0fa0483..8a1380a270 100644 --- a/scripts/base/protocols/ssl/files.bro +++ b/scripts/base/protocols/ssl/files.bro @@ -114,12 +114,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori c$ssl$cert_chain[|c$ssl$cert_chain|] = f$info; c$ssl$cert_chain_fuids[|c$ssl$cert_chain_fuids|] = f$id; } - - Files::add_analyzer(f, Files::ANALYZER_X509); - # Always calculate hashes. They are not necessary for base scripts - # but very useful for identification, and required for policy scripts. - Files::add_analyzer(f, Files::ANALYZER_MD5); - Files::add_analyzer(f, Files::ANALYZER_SHA1); } event ssl_established(c: connection) &priority=6 diff --git a/src/analyzer/protocol/krb/krb-padata.pac b/src/analyzer/protocol/krb/krb-padata.pac index b178239f4d..4667ab1677 100644 --- a/src/analyzer/protocol/krb/krb-padata.pac +++ b/src/analyzer/protocol/krb/krb-padata.pac @@ -75,8 +75,8 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a string file_id = file_mgr->HashHandle(file_handle.Description()); file_mgr->DataIn(reinterpret_cast(cert.data()), - cert.length(), bro_analyzer->GetAnalyzerTag(), - bro_analyzer->Conn(), true, file_id); + cert.length(), bro_analyzer->GetAnalyzerTag(), + bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); break; @@ -99,8 +99,8 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a string file_id = file_mgr->HashHandle(file_handle.Description()); file_mgr->DataIn(reinterpret_cast(cert.data()), - cert.length(), bro_analyzer->GetAnalyzerTag(), - bro_analyzer->Conn(), false, file_id); + cert.length(), bro_analyzer->GetAnalyzerTag(), + bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); break; diff --git a/src/analyzer/protocol/rdp/rdp-analyzer.pac b/src/analyzer/protocol/rdp/rdp-analyzer.pac index 01b47e9478..1ba2c465d8 100644 --- a/src/analyzer/protocol/rdp/rdp-analyzer.pac +++ b/src/analyzer/protocol/rdp/rdp-analyzer.pac @@ -142,7 +142,7 @@ refine flow RDP_Flow += { connection()->bro_analyzer()->GetAnalyzerTag(), connection()->bro_analyzer()->Conn(), false, // It seems there are only server certs? - file_id); + file_id, "application/x-x509-user-cert"); file_mgr->EndOfFile(file_id); return true; diff --git a/src/analyzer/protocol/ssl/proc-certificate.pac b/src/analyzer/protocol/ssl/proc-certificate.pac index c2353e3a88..ee2247e5be 100644 --- a/src/analyzer/protocol/ssl/proc-certificate.pac +++ b/src/analyzer/protocol/ssl/proc-certificate.pac @@ -9,6 +9,9 @@ common.AddRaw(is_orig ? "T" : "F", 1); bro_analyzer()->Conn()->IDString(&common); + string user_mime = "application/x-x509-user-cert"; + string ca_mime = "application/x-x509-ca-cert"; + for ( unsigned int i = 0; i < certificates->size(); ++i ) { const bytestring& cert = (*certificates)[i]; @@ -21,7 +24,7 @@ file_mgr->DataIn(reinterpret_cast(cert.data()), cert.length(), bro_analyzer()->GetAnalyzerTag(), - bro_analyzer()->Conn(), is_orig, file_id); + bro_analyzer()->Conn(), is_orig, file_id, i == 0 ? user_mime : ca_mime); file_mgr->EndOfFile(file_id); } return true; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 46e67f7cd8..2d9017a338 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -55,6 +55,7 @@ int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; int File::meta_mime_type_idx = -1; int File::meta_mime_types_idx = -1; +int File::meta_inferred_idx = -1; void File::StaticInit() { @@ -76,6 +77,7 @@ void File::StaticInit() bof_buffer_idx = Idx("bof_buffer", fa_file_type); meta_mime_type_idx = Idx("mime_type", fa_metadata_type); meta_mime_types_idx = Idx("mime_types", fa_metadata_type); + meta_inferred_idx = Idx("inferred", fa_metadata_type); } File::File(const string& file_id, const string& source_name, Connection* conn, @@ -290,6 +292,27 @@ void File::SetReassemblyBuffer(uint64 max) reassembly_max_buffer = max; } +bool File::SetMime(const string& mime_type) + { + if ( mime_type.empty() || bof_buffer.size != 0 ) + return false; + + did_metadata_inference = true; + bof_buffer.full = true; + + if ( ! FileEventAvailable(file_sniff) ) + return false; + + val_list* vl = new val_list(); + vl->append(val->Ref()); + RecordVal* meta = new RecordVal(fa_metadata_type); + vl->append(meta); + meta->Assign(meta_mime_type_idx, new StringVal(mime_type)); + meta->Assign(meta_inferred_idx, new Val(0, TYPE_BOOL)); + FileEvent(file_sniff, vl); + return true; + } + void File::InferMetadata() { did_metadata_inference = true; diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index c799907a8f..c52d9efbc4 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -171,6 +171,25 @@ public: */ void FileEvent(EventHandlerPtr h, val_list* vl); + + /** + * Sets the MIME type for a file to a specific value. + * + * Setting the MIME type has to be done before the MIME type is + * inferred from the content. After a MIME type has been set once, + * it cannot be changed anymore. + * + * This function should only be called when it does not make sense + * to perform automated MIME type detections. This is e.g. the case + * in protocols where the file type is fixed in the protocol description. + * This is for example the case for TLS and X.509 certificates. + * + * @param mime_type mime type to set + * @return true if the mime type was set. False if it could not be set because + * a mime type was already set or inferred. + */ + bool SetMime(const string& mime_type); + protected: friend class Manager; friend class FileReassembler; @@ -319,6 +338,7 @@ protected: static int bof_buffer_idx; static int mime_type_idx; static int mime_types_idx; + static int meta_inferred_idx; static int meta_mime_type_idx; static int meta_mime_types_idx; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 217c901969..3140a1e9db 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -110,7 +110,7 @@ void Manager::SetHandle(const string& handle) string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, analyzer::Tag tag, Connection* conn, bool is_orig, - const string& precomputed_id) + const string& precomputed_id, const string& mime_type) { string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; File* file = GetFile(id, conn, tag, is_orig); @@ -118,6 +118,9 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, if ( ! file ) return ""; + if ( ! mime_type.empty() ) + file->SetMime(mime_type); + file->DataIn(data, len, offset); if ( file->IsComplete() ) @@ -130,7 +133,8 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, } string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, - Connection* conn, bool is_orig, const string& precomputed_id) + Connection* conn, bool is_orig, const string& precomputed_id, + const string& mime_type) { string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; // Sequential data input shouldn't be going over multiple conns, so don't @@ -140,6 +144,9 @@ string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, if ( ! file ) return ""; + if ( ! mime_type.empty() ) + file->SetMime(mime_type); + file->DataIn(data, len); if ( file->IsComplete() ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index bcc8ac5dd2..d4ab6c8dfc 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -93,6 +93,12 @@ public: * or false if is being sent in the opposite direction. * @param precomputed_file_id may be set to a previous return value in order to * bypass costly file handle lookups. + * @param mime_type may be set to the mime type of the file, if already known due + * to the protocol. This is, e.g., the case in TLS connections where X.509 + * certificates are passed as files; here the type of the file is set by + * the protocol. If this parameter is give, mime type detection will be + * disabled. + * This parameter is only used for the first bit of data for each file. * @return a unique file ID string which, in certain contexts, may be * cached and passed back in to a subsequent function call in order * to avoid costly file handle lookups (which have to go through @@ -101,7 +107,8 @@ public: */ std::string DataIn(const u_char* data, uint64 len, uint64 offset, analyzer::Tag tag, Connection* conn, bool is_orig, - const std::string& precomputed_file_id = ""); + const std::string& precomputed_file_id = "", + const std::string& mime_type = ""); /** * Pass in sequential file data. @@ -113,6 +120,12 @@ public: * or false if is being sent in the opposite direction. * @param precomputed_file_id may be set to a previous return value in order to * bypass costly file handle lookups. + * @param mime_type may be set to the mime type of the file, if already known due + * to the protocol. This is, e.g., the case in TLS connections where X.509 + * certificates are passed as files; here the type of the file is set by + * the protocol. If this parameter is give, mime type detection will be + * disabled. + * This parameter is only used for the first bit of data for each file. * @return a unique file ID string which, in certain contexts, may be * cached and passed back in to a subsequent function call in order * to avoid costly file handle lookups (which have to go through @@ -121,7 +134,8 @@ public: */ std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag, Connection* conn, bool is_orig, - const std::string& precomputed_file_id = ""); + const std::string& precomputed_file_id = "", + const std::string& mime_type = ""); /** * Pass in sequential file data from external source (e.g. input framework).