diff --git a/CHANGES b/CHANGES index 647c97be22..7ea0ab62b9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,29 @@ +2.3-741 | 2015-04-20 13:12:39 -0700 + + * API changes to file analysis mime type detection. Removed + "file_mime_type" and "file_mime_types" event, replacing them with + a new event called "file_metadata_inferred". Addresses BIT-1368. + (Jon Siwek) + + * A large series of improvements for file type identification. This + inludes a many signature updates (new types, cleanup, performance + improvments) and splitting out signatures into subfiles. (Seth + Hall) + + * Fix an issue with files having gaps before the bof_buffer is + filled, which could lead to file type identification not working + correctly. (Seth Hall) + + * Fix an issue with packet loss in HTTP file reporting for file type + identification wasn't working correctly zero-length bodies. (Seth + Hall) + + * X.509 certificates are now populating files.log with the mime type + application/pkix-cert. (Seth Hall) + + * Normalized some FILE_ANALYSIS debug messages. (Seth Hall) + 2.3-725 | 2015-04-20 12:54:54 -0700 * Updating submodule(s). diff --git a/NEWS b/NEWS index 93ffbea5a3..b61f464a81 100644 --- a/NEWS +++ b/NEWS @@ -35,7 +35,10 @@ New Functionality failed or succeeded in most circumstances. - Bro's file analysis now supports reassembly of files that are not - transferred/seen sequentially. + transferred/seen sequentially. The default file reassembly buffer + size is set with the ``Files::reassembly_buffer_size`` variable. + +- Bro's file type identification has been greatly improved. - Bro's scripting language now has a ``while`` statement:: @@ -114,14 +117,17 @@ Changed Functionality - File analysis * Removed ``fa_file`` record's ``mime_type`` and ``mime_types`` - fields. The events ``file_mime_type`` and ``file_mime_types`` - have been added which contain the same information. The - ``mime_type`` field of ``Files::Info`` also still has this info. + fields. The event ``file_sniff`` has been added which provides + the same information. The ``mime_type`` field of ``Files::Info`` + also still has this info. * The earliest point that new mime type information is available is - in the ``file_mime_type`` event which comes after the ``file_new`` - and ``file_over_new_connection`` events. Scripts which inspected - mime type info within those events will need to be adapted. + in the ``file_sniff`` event which comes after the ``file_new`` and + ``file_over_new_connection`` events. Scripts which inspected mime + type info within those events will need to be adapted. (Note: for + users that worked w/ versions of Bro from git, for a while there was + also an event called ``file_mime_type`` which is now replaced with + the ``file_sniff`` event). * Removed ``Files::add_analyzers_for_mime_type`` function. diff --git a/VERSION b/VERSION index 91a8f8b8c7..c96bd4ae14 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.3-725 +2.3-741 diff --git a/doc/frameworks/file_analysis_02.bro b/doc/frameworks/file_analysis_02.bro index 141b11fca6..fd4f0e775e 100644 --- a/doc/frameworks/file_analysis_02.bro +++ b/doc/frameworks/file_analysis_02.bro @@ -1,7 +1,8 @@ -event file_mime_type(f: fa_file, mime_type: string) +event file_sniff(f: fa_file, meta: fa_metadata) { + if ( ! meta?$mime_type ) return; print "new file", f$id; - if ( mime_type == "text/plain" ) + if ( meta$mime_type == "text/plain" ) Files::add_analyzer(f, Files::ANALYZER_MD5); } diff --git a/doc/httpmonitor/file_extraction.bro b/doc/httpmonitor/file_extraction.bro index 3860cb361e..c387156b62 100644 --- a/doc/httpmonitor/file_extraction.bro +++ b/doc/httpmonitor/file_extraction.bro @@ -7,15 +7,18 @@ global mime_to_ext: table[string] of string = { ["text/html"] = "html", }; -event file_mime_type(f: fa_file, mime_type: string) +event file_sniff(f: fa_file, meta: fa_metadata) { if ( f$source != "HTTP" ) return; - if ( mime_type !in mime_to_ext ) + if ( ! meta?$mime_type ) return; - local fname = fmt("%s-%s.%s", f$source, f$id, mime_to_ext[mime_type]); + if ( meta$mime_type !in mime_to_ext ) + return; + + local fname = fmt("%s-%s.%s", f$source, f$id, mime_to_ext[meta$mime_type]); print fmt("Extracting file %s", fname); Files::add_analyzer(f, Files::ANALYZER_EXTRACT, [$extract_filename=fname]); - } \ No newline at end of file + } diff --git a/scripts/base/files/x509/main.bro b/scripts/base/files/x509/main.bro index a810132f8d..c097b84560 100644 --- a/scripts/base/files/x509/main.bro +++ b/scripts/base/files/x509/main.bro @@ -47,6 +47,9 @@ redef record Files::Info += { event x509_certificate(f: fa_file, cert_ref: opaque of x509, cert: X509::Certificate) &priority=5 { + if ( ! f$info?$mime_type ) + f$info$mime_type = "application/pkix-cert"; + f$info$x509 = [$ts=f$info$ts, $id=f$id, $certificate=cert, $handle=cert_ref]; } diff --git a/scripts/base/frameworks/files/magic/__load__.bro b/scripts/base/frameworks/files/magic/__load__.bro index c6ee799a53..34115f0a55 100644 --- a/scripts/base/frameworks/files/magic/__load__.bro +++ b/scripts/base/frameworks/files/magic/__load__.bro @@ -1,3 +1,9 @@ +@load-sigs ./archive +@load-sigs ./audio +@load-sigs ./font @load-sigs ./general +@load-sigs ./image @load-sigs ./msoffice -@load-sigs ./libmagic +@load-sigs ./video + +@load-sigs ./libmagic \ No newline at end of file diff --git a/scripts/base/frameworks/files/magic/archive.sig b/scripts/base/frameworks/files/magic/archive.sig new file mode 100644 index 0000000000..9b95f33b25 --- /dev/null +++ b/scripts/base/frameworks/files/magic/archive.sig @@ -0,0 +1,176 @@ + +signature file-tar { + file-magic /^[[:print:]\x00]{100}([[:digit:]\x20]{7}\x00){3}([[:digit:]\x20]{11}\x00){2}([[:digit:]\x00\x20]{7}[\x20\x00])[0-7\x00]/ + file-mime "application/x-tar", 100 +} + +# This is low priority so that files using zip as a +# container will be identified correctly. +signature file-zip { + file-mime "application/zip", 10 + file-magic /^PK\x03\x04.{2}/ +} + +# Multivolume Zip archive +signature file-multi-zip { + file-mime "application/zip", 10 + file-magic /^PK\x07\x08PK\x03\x04/ +} + +# RAR +signature file-rar { + file-mime "application/x-rar", 70 + file-magic /^Rar!/ +} + +# GZIP +signature file-gzip { + file-mime "application/x-gzip", 100 + file-magic /\x1f\x8b/ +} + +# Microsoft Cabinet +signature file-ms-cab { + file-mime "application/vnd.ms-cab-compressed", 110 + file-magic /^MSCF\x00\x00\x00\x00/ +} + +# Mac OS X DMG files +signature file-dmg { + file-magic /^(\x78\x01\x73\x0D\x62\x62\x60|\x78\xDA\x63\x60\x18\x05|\x78\x01\x63\x60\x18\x05|\x78\xDA\x73\x0D|\x78[\x01\xDA]\xED[\xD0-\xD9])/ + file-mime "application/x-dmg", 100 +} + +# XAR (eXtensible ARchive) format. +# Mac OS X uses this for the .pkg format. +signature file-xar { + file-magic /^xar\!/ + file-mime "application/x-xar", 100 +} + +# RPM +signature file-magic-auto352 { + file-mime "application/x-rpm", 70 + file-magic /^(drpm|\xed\xab\xee\xdb)/ +} + +# StuffIt +signature file-stuffit { + file-mime "application/x-stuffit", 70 + file-magic /^(SIT\x21|StuffIt)/ +} + +# Archived data +signature file-x-archive { + file-mime "application/x-archive", 70 + file-magic /^!?/ +} + +# ARC archive data +signature file-arc { + file-mime "application/x-arc", 70 + file-magic /^[\x00-\x7f]{2}[\x02-\x0a\x14\x48]\x1a/ +} + +# EET archive +signature file-eet { + file-mime "application/x-eet", 70 + file-magic /^\x1e\xe7\xff\x00/ +} + +# Zoo archive +signature file-zoo { + file-mime "application/x-zoo", 70 + file-magic /^.{20}\xdc\xa7\xc4\xfd/ +} + +# LZ4 compressed data (legacy format) +signature file-lz4-legacy { + file-mime "application/x-lz4", 70 + file-magic /(\x02\x21\x4c\x18)/ +} + +# LZ4 compressed data +signature file-lz4 { + file-mime "application/x-lz4", 70 + file-magic /^\x04\x22\x4d\x18/ +} + +# LRZIP compressed data +signature file-lrzip { + file-mime "application/x-lrzip", 1 + file-magic /^LRZI/ +} + +# LZIP compressed data +signature file-lzip { + file-mime "application/x-lzip", 70 + file-magic /^LZIP/ +} + +# Self-extracting PKZIP archive +signature file-magic-auto434 { + file-mime "application/zip", 340 + file-magic /^MZ.{28}(Copyright 1989\x2d1990 PKWARE Inc|PKLITE Copr)\x2e/ +} + +# LHA archive (LZH) +signature file-lzh { + file-mime "application/x-lzh", 80 + file-magic /^.{2}-(lh[ abcdex0-9]|lz[s2-8]|lz[s2-8]|pm[s012]|pc1)-/ +} + +# WARC Archive +signature file-warc { + file-mime "application/warc", 50 + file-magic /^WARC\x2f/ +} + +# 7-zip archive data +signature file-7zip { + file-mime "application/x-7z-compressed", 50 + file-magic /^7z\xbc\xaf\x27\x1c/ +} + +# XZ compressed data +signature file-xz { + file-mime "application/x-xz", 90 + file-magic /^\xfd7zXZ\x00/ +} + +# LHa self-extracting archive +signature file-magic-auto436 { + file-mime "application/x-lha", 120 + file-magic /^MZ.{34}LH[aA]\x27s SFX/ +} + +# ARJ archive data +signature file-arj { + file-mime "application/x-arj", 50 + file-magic /^\x60\xea/ +} + +# Byte-swapped cpio archive +signature file-bs-cpio { + file-mime "application/x-cpio", 50 + file-magic /(\x71\xc7|\xc7\x71)/ +} + +# CPIO archive +signature file-cpio { + file-mime "application/x-cpio", 50 + file-magic /^(\xc7\x71|\x71\xc7)/ +} + +# Compress'd data +signature file-compress { + file-mime "application/x-compress", 50 + file-magic /^\x1f\x9d/ +} + +# LZMA compressed data +signature file-lzma { + file-mime "application/x-lzma", 71 + file-magic /^\x5d\x00\x00/ +} + diff --git a/scripts/base/frameworks/files/magic/audio.sig b/scripts/base/frameworks/files/magic/audio.sig new file mode 100644 index 0000000000..efba99ed0d --- /dev/null +++ b/scripts/base/frameworks/files/magic/audio.sig @@ -0,0 +1,13 @@ + +# MPEG v3 audio +signature file-mpeg-audio { + file-mime "audio/mpeg", 20 + file-magic /^\xff[\xe2\xe3\xf2\xf3\xf6\xf7\xfa\xfb\xfc\xfd]/ +} + +# MPEG v4 audio +signature file-m4a { + file-mime "audio/m4a", 70 + file-magic /^....ftyp(m4a)/ +} + diff --git a/scripts/base/frameworks/files/magic/font.sig b/scripts/base/frameworks/files/magic/font.sig new file mode 100644 index 0000000000..8f2857f6e3 --- /dev/null +++ b/scripts/base/frameworks/files/magic/font.sig @@ -0,0 +1,41 @@ + +# Web Open Font Format +signature file-woff { + file-magic /^wOFF/ + file-mime "application/font-woff", 70 +} + +# TrueType font +signature file-ttf { + file-mime "application/x-font-ttf", 80 + file-magic /^\x00\x01\x00\x00\x00/ +} + +signature file-embedded-opentype { + file-mime "application/vnd.ms-fontobject", 50 + file-magic /^.{34}LP/ +} + +# X11 SNF font +signature file-snf { + file-mime "application/x-font-sfn", 70 + file-magic /^(\x04\x00\x00\x00|\x00\x00\x00\x04).{100}(\x04\x00\x00\x00|\x00\x00\x00\x04)/ +} + +# OpenType font +signature file-opentype { + file-mime "application/vnd.ms-opentype", 70 + file-magic /^OTTO/ +} + +# FrameMaker Font file +signature file-maker-screen-font { + file-mime "application/x-mif", 190 + file-magic /^\x3cMakerScreenFont/ +} + +# >0 string,=SplineFontDB: (len=13), ["Spline Font Database "], swap_endian=0 +signature file-spline-font-db { + file-mime "application/vnd.font-fontforge-sfd", 160 + file-magic /^SplineFontDB\x3a/ +} diff --git a/scripts/base/frameworks/files/magic/general.sig b/scripts/base/frameworks/files/magic/general.sig index 500c4f7be0..eb38d39c8c 100644 --- a/scripts/base/frameworks/files/magic/general.sig +++ b/scripts/base/frameworks/files/magic/general.sig @@ -1,18 +1,87 @@ # General purpose file magic signatures. +# Plaintext +# (Including BOMs for UTF-8, 16, and 32) signature file-plaintext { - file-magic /^([[:print:][:space:]]{10})/ - file-mime "text/plain", -20 + file-mime "text/plain", -20 + file-magic /^(\xef\xbb\xbf|(\x00\x00)?\xfe\xff|\xff\xfe(\x00\x00)?)?[[:space:]\x20-\x7E]{10}/ } -signature file-tar { - file-magic /^[[:print:]\x00]{100}([[:digit:]\x20]{7}\x00){3}([[:digit:]\x20]{11}\x00){2}([[:digit:]\x00\x20]{7}[\x20\x00])[0-7\x00]/ - file-mime "application/x-tar", 100 +signature file-json { + file-mime "text/json", 1 + file-magic /^(\xef\xbb\xbf)?[\x0d\x0a[:blank:]]*\{[\x0d\x0a[:blank:]]*(["][^"]{1,}["]|[a-zA-Z][a-zA-Z0-9\\_]*)[\x0d\x0a[:blank:]]*:[\x0d\x0a[:blank:]]*(["]|\[|\{|[0-9]|true|false)/ } -signature file-zip { - file-mime "application/zip", 10 - file-magic /^PK\x03\x04.{2}/ +signature file-json2 { + file-mime "text/json", 1 + file-magic /^(\xef\xbb\xbf)?[\x0d\x0a[:blank:]]*\[[\x0d\x0a[:blank:]]*(((["][^"]{1,}["]|[0-9]{1,}(\.[0-9]{1,})?|true|false)[\x0d\x0a[:blank:]]*,)|\{|\[)[\x0d\x0a[:blank:]]*/ +} + +# Match empty JSON documents. +signature file-json3 { + file-mime "text/json", 0 + file-magic /^(\xef\xbb\xbf)?[\x0d\x0a[:blank:]]*(\[\]|\{\})[\x0d\x0a[:blank:]]*$/ +} + +signature file-xml { + file-mime "application/xml", 10 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<\?xml / +} + +signature file-xhtml { + file-mime "text/html", 100 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<(![dD][oO][cC][tT][yY][pP][eE] {1,}[hH][tT][mM][lL]|[hH][tT][mM][lL]|[mM][eE][tT][aA] {1,}[hH][tT][tT][pP]-[eE][qQ][uU][iI][vV])/ +} + +signature file-html { + file-mime "text/html", 49 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*)?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<([hH][eE][aA][dD]|[hH][tT][mM][lL]|[tT][iI][tT][lL][eE]|[bB][oO][dD][yY])/ +} + +signature file-rss { + file-mime "text/rss", 90 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<[rR][sS][sS]/ +} + +signature file-atom { + file-mime "text/atom", 100 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<([rR][sS][sS][^>]*xmlns:atom|[fF][eE][eE][dD][^>]*xmlns=["']?http:\/\/www.w3.org\/2005\/Atom["']?)/ +} + +signature file-soap { + file-mime "application/soap+xml", 49 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<[sS][oO][aA][pP](-[eE][nN][vV])?:[eE][nN][vV][eE][lL][oO][pP][eE]/ +} + +signature file-cross-domain-policy { + file-mime "text/x-cross-domain-policy", 49 + file-magic /^([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*)?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<[cC][rR][oO][sS][sS]-[dD][oO][mM][aA][iI][nN]-[pP][oO][lL][iI][cC][yY]/ +} + +signature file-xmlrpc { + file-mime "application/xml-rpc", 49 + file-magic /^(\xef\xbb\xbf)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*(<\?xml .*\?>)?([\x0d\x0a[:blank:]]*()?[\x0d\x0a[:blank:]]*)*<[mM][eE][tT][hH][oO][dD][rR][eE][sS][pP][oO][nN][sS][eE]>/ +} + +signature file-coldfusion { + file-mime "magnus-internal/cold-fusion", 20 + file-magic /^([\x0d\x0a[:blank:]]*()?)*<(CFPARAM|CFSET|CFIF)/ +} + +# Microsoft LNK files +signature file-lnk { + file-mime "application/x-ms-shortcut", 49 + file-magic /^\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x10\x00\x00\x00\x46/ } signature file-jar { @@ -21,8 +90,20 @@ signature file-jar { } signature file-java-applet { - file-magic /^\xca\xfe\xba\xbe...[\x2e-\x34]/ file-mime "application/x-java-applet", 71 + file-magic /^\xca\xfe\xba\xbe...[\x2d-\x34]/ +} + +# OCSP requests over HTTP. +signature file-ocsp-request { + file-magic /^.{11,19}\x06\x05\x2b\x0e\x03\x02\x1a/ + file-mime "application/ocsp-request", 71 +} + +# OCSP responses over HTTP. +signature file-ocsp-response { + file-magic /^.{11,19}\x06\x09\x2B\x06\x01\x05\x05\x07\x30\x01\x01/ + file-mime "application/ocsp-response", 71 } # Shockwave flash @@ -37,12 +118,6 @@ signature file-tnef { file-mime "application/vnd.ms-tnef", 100 } -# Mac OS X DMG files -signature file-dmg { - file-magic /^(\x78\x01\x73\x0D\x62\x62\x60|\x78\xDA\x63\x60\x18\x05|\x78\x01\x63\x60\x18\x05|\x78\xDA\x73\x0D|\x78[\x01\xDA]\xED[\xD0-\xD9])/ - file-mime "application/x-dmg", 100 -} - # Mac OS X Mach-O executable signature file-mach-o { file-magic /^[\xce\xcf]\xfa\xed\xfe/ @@ -55,13 +130,6 @@ signature file-mach-o-universal { file-mime "application/x-mach-o-executable", 100 } -# XAR (eXtensible ARchive) format. -# Mac OS X uses this for the .pkg format. -signature file-xar { - file-magic /^xar\!/ - file-mime "application/x-xar", 100 -} - signature file-pkcs7 { file-magic /^MIME-Version:.*protocol=\"application\/pkcs7-signature\"/ file-mime "application/pkcs7-signature", 100 @@ -79,16 +147,6 @@ signature file-jnlp { file-mime "application/x-java-jnlp-file", 100 } -signature file-ico { - file-magic /^\x00\x00\x01\x00/ - file-mime "image/x-icon", 70 -} - -signature file-cur { - file-magic /^\x00\x00\x02\x00/ - file-mime "image/x-cursor", 70 -} - signature file-pcap { file-magic /^(\xa1\xb2\xc3\xd4|\xd4\xc3\xb2\xa1)/ file-mime "application/vnd.tcpdump.pcap", 70 @@ -119,7 +177,58 @@ signature file-python { file-mime "text/x-python", 60 } +signature file-awk { + file-mime "text/x-awk", 60 + file-magic /^\x23\x21[^\n]{1,15}bin\/(env[[:space:]]+)?(g|n)?awk/ +} + +signature file-tcl { + file-mime "text/x-tcl", 60 + file-magic /^\x23\x21[^\n]{1,15}bin\/(env[[:space:]]+)?(wish|tcl)/ +} + +signature file-lua { + file-mime "text/x-lua", 49 + file-magic /^\x23\x21[^\n]{1,15}bin\/(env[[:space:]]+)?lua/ +} + +signature file-javascript { + file-mime "application/javascript", 60 + file-magic /^\x23\x21[^\n]{1,15}bin\/(env[[:space:]]+)?node(js)?/ +} + +signature file-javascript2 { + file-mime "application/javascript", 60 + file-magic /^[\x0d\x0a[:blank:]]*<[sS][cC][rR][iI][pP][tT][[:blank:]]+([tT][yY][pP][eE]|[lL][aA][nN][gG][uU][aA][gG][eE])=['"]?([tT][eE][xX][tT]\/)?[jJ][aA][vV][aA][sS][cC][rR][iI][pP][tT]/ +} + +signature file-javascript3 { + file-mime "application/javascript", 60 + # This seems to be a somewhat common idiom in javascript. + file-magic /^[\x0d\x0a[:blank:]]*for \(;;\);/ +} + +signature file-javascript4 { + file-mime "application/javascript", 60 + file-magic /^[\x0d\x0a[:blank:]]*document\.write(ln)?[:blank:]?\(/ +} + +signature file-javascript5 { + file-mime "application/javascript", 60 + file-magic /^\(function\(\)[[:blank:]\n]*\{/ +} + +signature file-javascript6 { + file-mime "application/javascript", 60 + file-magic /^[\x0d\x0a[:blank:]]*