diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index dbfc95ac31..e148248727 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -72,11 +72,6 @@ export { ## inspection in *bof_buffer* field. bof_buffer_size: count &log &optional; - ## A file type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the file type based on the first data seen. - file_type: string &log &optional; - ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. @@ -249,7 +244,6 @@ function set_info(f: fa_file) f$info$overflow_bytes = f$overflow_bytes; f$info$timeout_interval = f$timeout_interval; f$info$bof_buffer_size = f$bof_buffer_size; - if ( f?$file_type ) f$info$file_type = f$file_type; if ( f?$mime_type ) f$info$mime_type = f$mime_type; if ( f?$conns ) for ( cid in f$conns ) diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 07433512a2..e59fa6fd72 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -374,11 +374,6 @@ type fa_file: record { ## This is also the buffer that's used for file/mime type detection. bof_buffer: string &optional; - ## A file type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the file type based on the first data seen. - file_type: string &optional; - ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro index 9996a70faa..7ed4b58a37 100644 --- a/scripts/base/protocols/http/file-ident.bro +++ b/scripts/base/protocols/http/file-ident.bro @@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5 c$http$mime_type = f$mime_type; - local mime_str: string = split1(f$mime_type, /;/)[1]; + local mime_str: string = c$http$mime_type; if ( mime_str !in mime_types_extensions ) next; if ( ! c$http?$uri ) next; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 2da64e5c72..3e7e1d7b64 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -45,10 +45,8 @@ int File::overflow_bytes_idx = -1; int File::timeout_interval_idx = -1; int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; -int File::file_type_idx = -1; int File::mime_type_idx = -1; -magic_t File::magic = 0; magic_t File::magic_mime = 0; string File::salt; @@ -69,10 +67,8 @@ void File::StaticInit() timeout_interval_idx = Idx("timeout_interval"); bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_idx = Idx("bof_buffer"); - file_type_idx = Idx("file_type"); mime_type_idx = Idx("mime_type"); - bro_init_magic(&magic, MAGIC_NONE); bro_init_magic(&magic_mime, MAGIC_MIME); salt = BifConst::FileAnalysis::salt->CheckString(); @@ -247,18 +243,22 @@ bool File::BufferBOF(const u_char* data, uint64 len) return true; } -bool File::DetectTypes(const u_char* data, uint64 len) +bool File::DetectMIME(const u_char* data, uint64 len) { - const char* desc = bro_magic_buffer(magic, data, len); const char* mime = bro_magic_buffer(magic_mime, data, len); - if ( desc ) - val->Assign(file_type_idx, new StringVal(desc)); - if ( mime ) - val->Assign(mime_type_idx, new StringVal(mime)); + { + const char* mime_end = strchr(mime, ';'); - return desc || mime; + if ( mime_end ) + // strip off charset + val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime)); + else + val->Assign(mime_type_idx, new StringVal(mime)); + } + + return mime; } void File::ReplayBOF() @@ -276,7 +276,7 @@ void File::ReplayBOF() BroString* bs = concatenate(bof_buffer.chunks); val->Assign(bof_buffer_idx, new StringVal(bs)); - DetectTypes(bs->Bytes(), bs->Len()); + DetectMIME(bs->Bytes(), bs->Len()); FileEvent(file_new); @@ -291,7 +291,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) if ( first_chunk ) { // TODO: this should all really be delayed until we attempt reassembly - DetectTypes(data, len); + DetectMIME(data, len); FileEvent(file_new); first_chunk = false; } @@ -326,7 +326,7 @@ void File::DataIn(const u_char* data, uint64 len) if ( missed_bof ) { - DetectTypes(data, len); + DetectMIME(data, len); FileEvent(file_new); missed_bof = false; } diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index bede666f13..2406f4a32a 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -170,11 +170,11 @@ protected: void ReplayBOF(); /** - * Does file/mime type detection and assigns types (if available) to - * corresponding fields in #val. - * @return whether a file or mime type was available. + * Does mime type detection and assigns type (if available) to \c mime_type + * field in #val. + * @return whether mime type was available. */ - bool DetectTypes(const u_char* data, uint64 len); + bool DetectMIME(const u_char* data, uint64 len); FileID id; /**< A pretty hash that likely identifies file */ string unique; /**< A string that uniquely identifies file */ @@ -207,7 +207,6 @@ protected: */ static void StaticInit(); - static magic_t magic; static magic_t magic_mime; static string salt; @@ -224,7 +223,6 @@ protected: static int timeout_interval_idx; static int bof_buffer_size_idx; static int bof_buffer_idx; - static int file_type_idx; static int mime_type_idx; }; diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index 65744f55d6..c16f2bc1e1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set file_stream, Cx92a0ym5R8, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_chunk, Cx92a0ym5R8, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout index 09b82fb655..67a56e0d89 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout @@ -1,7 +1,6 @@ FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 @@ -11,7 +10,6 @@ source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_TIMEOUT FILE_TIMEOUT diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out index cd7c150023..d3ba15b958 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out index 0c9b0151cc..3d082df87d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out @@ -3,5 +3,4 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index 3bc7a26f4f..a3fa989e49 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -3,7 +3,6 @@ sidhzrR4IT8, 0, 0 FILE_BOF_BUFFER The Nationa FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE sidhzrR4IT8, 16557, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out index b01f1fbf30..297edfc767 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out @@ -3,7 +3,6 @@ kg59rqyYxN, 0, 0 FILE_BOF_BUFFER {^J "origin FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE kg59rqyYxN, 197, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out index 2d2abf89c6..801fd2bd6c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 1d8f8ddbce..61c164c81b 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -1,7 +1,6 @@ FILE_NEW 7gZBKVUgy4l, 0, 0 FILE_TYPE -file type is set mime type is set FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index 84c988158d..4d0c0a77ae 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -1,7 +1,6 @@ FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 @@ -11,7 +10,6 @@ source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_TIMEOUT FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index 53f433ba73..a0d1a21327 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -1,7 +1,6 @@ FILE_NEW uHS14uhRKGe, 0, 0 FILE_TYPE -file type is set mime type is set FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index 28f3a5de04..ba2c318fa1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -3,7 +3,6 @@ aFQKI8SPOL2, 0, 0 FILE_BOF_BUFFER /*^J******** FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE aFQKI8SPOL2, 2675, 0 @@ -17,7 +16,6 @@ CCU3vUEr06l, 0, 0 FILE_BOF_BUFFER //-- Google FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE CCU3vUEr06l, 21421, 0 @@ -31,7 +29,6 @@ HCzA0dVwDPj, 0, 0 FILE_BOF_BUFFER GIF89a^D\0^D\0\xb3 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE HCzA0dVwDPj, 94, 0 @@ -46,7 +43,6 @@ a1Zu1fteVEf, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE a1Zu1fteVEf, 2349, 0 @@ -61,7 +57,6 @@ xXlF7wFdsR, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE xXlF7wFdsR, 27579, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index ac249fd253..3f0146eea7 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -3,7 +3,6 @@ v5HLI7MxPQh, 0, 0 FILE_BOF_BUFFER hello world FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE v5HLI7MxPQh, 11, 0 @@ -18,7 +17,6 @@ PZS1XGHkIf1, 0, 0 FILE_BOF_BUFFER {^J "origin FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE PZS1XGHkIf1, 366, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout index 2cae5a3f22..d3845e39db 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout @@ -3,7 +3,6 @@ nYgPNGLrZf9, 0, 0 FILE_BOF_BUFFER #separator FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE nYgPNGLrZf9, 311, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out index aa6384f82a..ce5fd67778 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out @@ -3,7 +3,6 @@ wqKMAamJVSb, 0, 0 FILE_BOF_BUFFER PK^C^D^T\0\0\0^H\0\xae FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE wqKMAamJVSb, 42208, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index cf0c223a5b..8e04fefa81 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-04-04-21-22-26 -#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 -#types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-04-04-21-22-26 +#open 2013-04-11-17-29-51 +#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 +#types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string +Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-04-11-17-29-51 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out index 27e9c42c5b..188b010b35 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out @@ -3,7 +3,6 @@ cwR7l6Zctxb, 0, 0 FILE_BOF_BUFFER Hello^M^J^M^J ^M FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE cwR7l6Zctxb, 79, 0 @@ -17,7 +16,6 @@ ZAOEQmRyxv1, 0, 0 FILE_BOF_BUFFER