Merge remote-tracking branch 'origin/topic/seth/faf-updates'

* origin/topic/seth/faf-updates: (27 commits)
  Undoing the FTP tests I updated earlier.
  Update the last two btest FAF tests.
  File analysis fixes and test updates.
  Fix a bug with getting analyzer tags.
  A few test updates.
  Some tests work now (at least they all don't fail anymore!)
  Forgot a file.
  Added protocol description functions that provide a super compressed log representation.
  Fix a bug where orig file information in http wasn't working right.
  Added mime types to http.log
  Clean up queued but unused file_over_new_connections event args.
  Add jar files to the default MHR lookups.
  Adding CAB files for MHR checking.
  Improve malware hash registry script.
  Fix a small issue with finding smtp entities.
  Added support for files to the notice framework.
  Make the custom libmagic database a git submodule.
  Add an is_orig parameter to file_over_new_connection event.
  Make magic for emitting application/msword mime type less strict.
  Disable more libmagic builtin checks that override the magic database.
  ...

Conflicts:
	doc/scripts/DocSourcesList.cmake
	scripts/base/init-bare.bro
	scripts/test-all-policy.bro
	testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
This commit is contained in:
Robin Sommer 2013-07-29 14:21:52 -07:00
commit 984e9793db
196 changed files with 1548 additions and 5033 deletions

3
.gitmodules vendored
View file

@ -16,3 +16,6 @@
[submodule "cmake"] [submodule "cmake"]
path = cmake path = cmake
url = git://git.bro-ids.org/cmake url = git://git.bro-ids.org/cmake
[submodule "magic"]
path = magic
url = git://git.bro.org/bromagic

View file

@ -18,7 +18,7 @@ get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE) ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic) set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic) set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic/database)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
@ -201,9 +201,8 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING install(DIRECTORY ./magic/database/
PATTERN "COPYING" EXCLUDE DESTINATION ${BRO_MAGIC_INSTALL_PATH}
PATTERN "*"
) )
######################################################################## ########################################################################

View file

@ -99,7 +99,7 @@ macro(REST_TARGET srcDir broInput)
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state ARGS -E remove_directory .state
# generate the reST documentation using bro # generate the reST documentation using bro
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1) ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1)
# move generated doc into a new directory tree that # move generated doc into a new directory tree that
# defines the final structure of documents # defines the final structure of documents
@ -130,7 +130,7 @@ add_custom_command(OUTPUT proto-analyzers.rst
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state ARGS -E remove_directory .state
# generate the reST documentation using bro # generate the reST documentation using bro
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1) ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1)
# move generated doc into a new directory tree that # move generated doc into a new directory tree that
# defines the final structure of documents # defines the final structure of documents

View file

@ -74,6 +74,8 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro)
rest_target(${psd} base/files/extract/main.bro)
rest_target(${psd} base/files/hash/main.bro)
rest_target(${psd} base/frameworks/analyzer/main.bro) rest_target(${psd} base/frameworks/analyzer/main.bro)
rest_target(${psd} base/frameworks/cluster/main.bro) rest_target(${psd} base/frameworks/cluster/main.bro)
rest_target(${psd} base/frameworks/cluster/nodes/manager.bro) rest_target(${psd} base/frameworks/cluster/nodes/manager.bro)
@ -83,7 +85,7 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
rest_target(${psd} base/frameworks/communication/main.bro) rest_target(${psd} base/frameworks/communication/main.bro)
rest_target(${psd} base/frameworks/control/main.bro) rest_target(${psd} base/frameworks/control/main.bro)
rest_target(${psd} base/frameworks/dpd/main.bro) rest_target(${psd} base/frameworks/dpd/main.bro)
rest_target(${psd} base/frameworks/file-analysis/main.bro) rest_target(${psd} base/frameworks/files/main.bro)
rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/main.bro)
rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro)
rest_target(${psd} base/frameworks/input/readers/benchmark.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro)
@ -137,25 +139,22 @@ rest_target(${psd} base/protocols/conn/main.bro)
rest_target(${psd} base/protocols/conn/polling.bro) rest_target(${psd} base/protocols/conn/polling.bro)
rest_target(${psd} base/protocols/dns/consts.bro) rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro) rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/file-analysis.bro) rest_target(${psd} base/protocols/ftp/files.bro)
rest_target(${psd} base/protocols/ftp/file-extract.bro)
rest_target(${psd} base/protocols/ftp/gridftp.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro)
rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/http/file-analysis.bro) rest_target(${psd} base/protocols/ftp/utils.bro)
rest_target(${psd} base/protocols/http/file-extract.bro) rest_target(${psd} base/protocols/http/entities.bro)
rest_target(${psd} base/protocols/http/file-hash.bro) rest_target(${psd} base/protocols/http/files.bro)
rest_target(${psd} base/protocols/http/file-ident.bro)
rest_target(${psd} base/protocols/http/main.bro) rest_target(${psd} base/protocols/http/main.bro)
rest_target(${psd} base/protocols/http/utils.bro) rest_target(${psd} base/protocols/http/utils.bro)
rest_target(${psd} base/protocols/irc/dcc-send.bro) rest_target(${psd} base/protocols/irc/dcc-send.bro)
rest_target(${psd} base/protocols/irc/file-analysis.bro) rest_target(${psd} base/protocols/irc/files.bro)
rest_target(${psd} base/protocols/irc/main.bro) rest_target(${psd} base/protocols/irc/main.bro)
rest_target(${psd} base/protocols/modbus/consts.bro) rest_target(${psd} base/protocols/modbus/consts.bro)
rest_target(${psd} base/protocols/modbus/main.bro) rest_target(${psd} base/protocols/modbus/main.bro)
rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} base/protocols/smtp/entities.bro) rest_target(${psd} base/protocols/smtp/entities.bro)
rest_target(${psd} base/protocols/smtp/file-analysis.bro) rest_target(${psd} base/protocols/smtp/files.bro)
rest_target(${psd} base/protocols/smtp/main.bro) rest_target(${psd} base/protocols/smtp/main.bro)
rest_target(${psd} base/protocols/socks/consts.bro) rest_target(${psd} base/protocols/socks/consts.bro)
rest_target(${psd} base/protocols/socks/main.bro) rest_target(${psd} base/protocols/socks/main.bro)
@ -183,6 +182,8 @@ rest_target(${psd} policy/frameworks/control/controllee.bro)
rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/control/controller.bro)
rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro)
rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro)
rest_target(${psd} policy/frameworks/files/detect-MHR.bro)
rest_target(${psd} policy/frameworks/files/hash-all-files.bro)
rest_target(${psd} policy/frameworks/intel/do_notice.bro) rest_target(${psd} policy/frameworks/intel/do_notice.bro)
rest_target(${psd} policy/frameworks/intel/seen/conn-established.bro) rest_target(${psd} policy/frameworks/intel/seen/conn-established.bro)
rest_target(${psd} policy/frameworks/intel/seen/dns.bro) rest_target(${psd} policy/frameworks/intel/seen/dns.bro)
@ -216,7 +217,6 @@ rest_target(${psd} policy/protocols/dns/detect-external-names.bro)
rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro) rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro)
rest_target(${psd} policy/protocols/ftp/detect.bro) rest_target(${psd} policy/protocols/ftp/detect.bro)
rest_target(${psd} policy/protocols/ftp/software.bro) rest_target(${psd} policy/protocols/ftp/software.bro)
rest_target(${psd} policy/protocols/http/detect-MHR.bro)
rest_target(${psd} policy/protocols/http/detect-sqli.bro) rest_target(${psd} policy/protocols/http/detect-sqli.bro)
rest_target(${psd} policy/protocols/http/detect-webapps.bro) rest_target(${psd} policy/protocols/http/detect-webapps.bro)
rest_target(${psd} policy/protocols/http/header-names.bro) rest_target(${psd} policy/protocols/http/header-names.bro)
@ -228,6 +228,7 @@ rest_target(${psd} policy/protocols/modbus/known-masters-slaves.bro)
rest_target(${psd} policy/protocols/modbus/track-memmap.bro) rest_target(${psd} policy/protocols/modbus/track-memmap.bro)
rest_target(${psd} policy/protocols/smtp/blocklists.bro) rest_target(${psd} policy/protocols/smtp/blocklists.bro)
rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro) rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro)
rest_target(${psd} policy/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} policy/protocols/smtp/software.bro) rest_target(${psd} policy/protocols/smtp/software.bro)
rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro) rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro)
rest_target(${psd} policy/protocols/ssh/geo-data.bro) rest_target(${psd} policy/protocols/ssh/geo-data.bro)

View file

@ -1,29 +0,0 @@
# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
# Software written by Ian F. Darwin and others;
# maintained 1994- Christos Zoulas.
#
# This software is not subject to any export provision of the United States
# Department of Commerce, and may be exported to any country or planet.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice immediately at the beginning of the file, without modification,
# this list of conditions, and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

View file

@ -1,208 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $
# animation: file(1) magic for animation/movie formats
#
# animation formats
# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
# SGI and Apple formats
0 string MOVI Silicon Graphics movie file
!:mime video/x-sgi-movie
4 string moov Apple QuickTime
!:mime video/quicktime
4 string mdat Apple QuickTime movie (unoptimized)
!:mime video/quicktime
#4 string wide Apple QuickTime movie (unoptimized)
#!:mime video/quicktime
#4 string skip Apple QuickTime movie (modified)
#!:mime video/quicktime
#4 string free Apple QuickTime movie (modified)
#!:mime video/quicktime
4 string idsc Apple QuickTime image (fast start)
!:mime image/x-quicktime
#4 string idat Apple QuickTime image (unoptimized)
#!:mime image/x-quicktime
4 string pckg Apple QuickTime compressed archive
!:mime application/x-quicktime-player
4 string/W jP JPEG 2000 image
!:mime image/jp2
4 string ftyp ISO Media
>8 string isom \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp41 \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp42 \b, MPEG v4 system, version 2
!:mime video/mp4
>8 string/W jp2 \b, JPEG 2000
!:mime image/jp2
>8 string 3ge \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gg \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gp \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gs \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3g2 \b, MPEG v4 system, 3GPP2
!:mime video/3gpp2
>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile
!:mime video/mp4
>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC
!:mime video/3gpp
>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC
!:mime audio/mp4
>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC
!:mime video/mp4
>8 string/W qt \b, Apple QuickTime movie
!:mime video/quicktime
# MPEG sequences
# Scans for all common MPEG header start codes
0 belong&0xFFFFFF00 0x00000100
>3 byte 0xBA MPEG sequence
!:mime video/mpeg
# GRR too general as it catches also FoxPro Memo example NG.FPT
>3 byte 0xB0 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB5 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB3 MPEG sequence
!:mime video/mpeg
# MPEG ADTS Audio (*.mpx/mxa/aac)
# from dreesen@math.fu-berlin.de
# modified to fully support MPEG ADTS
# MP3, M1A
# modified by Joerg Jenderek
# GRR the original test are too common for many DOS files
# so don't accept as MP3 until we've tested the rate
0 beshort&0xFFFE 0xFFFA
# rates
>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps
!:mime audio/mpeg
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
!:mime audio/mpeg
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
!:mime audio/mpeg
# MPA, M2A
0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2
!:mime audio/mpeg
# MP3, M25A
0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5
!:mime audio/mpeg
# Stored AAC streams (instead of the MP4 format)
0 string ADIF MPEG ADIF, AAC
!:mime audio/x-hx-aac-adif
# Live or stored single AAC stream (used with MPEG-2 systems)
0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC
!:mime audio/x-hx-aac-adts
# Live MPEG-4 audio streams (instead of RTP FlexMux)
0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS
!:mime audio/x-mp4a-latm
# This magic isn't strong enough (matches plausible ISO-8859-1 text)
#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream
#!:mime audio/x-mp4a-latm
# Summary: FLI animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF11
# standard FLI always has 320x200 resolution and 8 bit color
>8 leshort 320
>>10 leshort 200
>>>12 leshort 8 FLI animation, 320x200x8
!:mime video/x-fli
# Summary: FLC animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF12
# standard FLC always use 8 bit color
>12 leshort 8 FLC animation
!:mime video/x-flc
# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
0 belong 0x3026b275 Microsoft ASF
!:mime video/x-ms-asf
# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8aMNG MNG video data,
!:mime video/x-mng
# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8bJNG JNG video data,
!:mime video/x-jng
# VRML (Virtual Reality Modelling Language)
0 string/w #VRML\ V1.0\ ascii VRML 1 file
!:mime model/vrml
0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file
!:mime model/vrml
# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
# From Michel Briand <michelbriand@free.fr>
0 string/t \<?xml\ version="
!:strength +1
>20 search/1000/cw \<!DOCTYPE\ X3D X3D (Extensible 3D) model xml text
!:mime model/x3d
# MPEG file
# MPEG sequences
# FIXME: This section is from the old magic.mime file and needs integrating with the rest
0 belong 0x000001BA
>4 byte &0x40
!:mime video/mp2p
>4 byte ^0x40
!:mime video/mpeg
0 belong 0x000001BB
!:mime video/mpeg
0 belong 0x000001B0
!:mime video/mp4v-es
0 belong 0x000001B5
!:mime video/mp4v-es
0 belong 0x000001B3
!:mime video/mpv
0 belong&0xFF5FFF1F 0x47400010
!:mime video/mp2t
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264

View file

@ -1,242 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
# pre-POSIX "tar" archives are handled in the C code.
# POSIX tar archives
257 string ustar\0 POSIX tar archive
!:mime application/x-tar # encoding: posix
257 string ustar\040\040\0 GNU tar archive
!:mime application/x-tar # encoding: gnu
# cpio archives
#
# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
# The idea is to indicate archives produced on machines with the same
# byte order as the machine running "file" with "cpio archive", and
# to indicate archives produced on machines with the opposite byte order
# from the machine running "file" with "byte-swapped cpio archive".
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
0 short 070707 cpio archive
!:mime application/x-cpio
0 short 0143561 byte-swapped cpio archive
!:mime application/x-cpio # encoding: swapped
#
# System V Release 1 portable(?) archive format.
#
0 string =<ar> System V Release 1 ar archive
!:mime application/x-archive
#
# Debian package; it's in the portable archive format, and needs to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "debian".
#
0 string =!<arch>\ndebian
!:mime application/x-debian-package
#
# MIPS archive; they're in the portable archive format, and need to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "__________E".
#
0 string =!<arch>\n__________E MIPS archive
!:mime application/x-archive
#
# BSD/SVR2-and-later portable archive formats.
#
0 string =!<arch> current ar archive
!:mime application/x-archive
# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
#
# The first byte is the magic (0x1a), byte 2 is the compression type for
# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
# filename of the first file (null terminated). Since some types collide
# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
!:mime application/x-arc
# [JW] stuff taken from idarc, obviously ARC successors:
0 lelong&0x8080ffff 0x00000a1a PAK archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000141a ARC+ archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000481a HYP archive data
!:mime application/x-arc
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data
!:mime application/x-arj
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lh0- LHarc 1.x/ARX archive data [lh0]
!:mime application/x-lharc
2 string -lh1- LHarc 1.x/ARX archive data [lh1]
!:mime application/x-lharc
2 string -lz4- LHarc 1.x archive data [lz4]
!:mime application/x-lharc
2 string -lz5- LHarc 1.x archive data [lz5]
!:mime application/x-lharc
# [never seen any but the last; -lh4- reported in comp.compression:]
2 string -lzs- LHa/LZS archive data [lzs]
!:mime application/x-lha
2 string -lh\40- LHa 2.x? archive data [lh ]
!:mime application/x-lha
2 string -lhd- LHa 2.x? archive data [lhd]
!:mime application/x-lha
2 string -lh2- LHa 2.x? archive data [lh2]
!:mime application/x-lha
2 string -lh3- LHa 2.x? archive data [lh3]
!:mime application/x-lha
2 string -lh4- LHa (2.x) archive data [lh4]
!:mime application/x-lha
2 string -lh5- LHa (2.x) archive data [lh5]
!:mime application/x-lha
2 string -lh6- LHa (2.x) archive data [lh6]
!:mime application/x-lha
2 string -lh7- LHa (2.x)/LHark archive data [lh7]
!:mime application/x-lha
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0 string Rar! RAR archive data,
!:mime application/x-rar
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
!:mime application/zip
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\003\004
# Specialised zip formats which start with a member named 'mimetype'
# (stored uncompressed, with no 'extra field') containing the file's MIME type.
# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
# contents starting with "application/":
>26 string \x8\0\0\0mimetypeapplication/
# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
# http://lists.oasis-open.org/archives/office/200505/msg00006.html
# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
>>50 string vnd.oasis.opendocument. OpenDocument
>>>73 string text
>>>>77 byte !0x2d Text
!:mime application/vnd.oasis.opendocument.text
>>>>77 string -template Text Template
!:mime application/vnd.oasis.opendocument.text-template
>>>>77 string -web HTML Document Template
!:mime application/vnd.oasis.opendocument.text-web
>>>>77 string -master Master Document
!:mime application/vnd.oasis.opendocument.text-master
>>>73 string graphics
>>>>81 byte !0x2d Drawing
!:mime application/vnd.oasis.opendocument.graphics
>>>>81 string -template Template
!:mime application/vnd.oasis.opendocument.graphics-template
>>>73 string presentation
>>>>85 byte !0x2d Presentation
!:mime application/vnd.oasis.opendocument.presentation
>>>>85 string -template Template
!:mime application/vnd.oasis.opendocument.presentation-template
>>>73 string spreadsheet
>>>>84 byte !0x2d Spreadsheet
!:mime application/vnd.oasis.opendocument.spreadsheet
>>>>84 string -template Template
!:mime application/vnd.oasis.opendocument.spreadsheet-template
>>>73 string chart
>>>>78 byte !0x2d Chart
!:mime application/vnd.oasis.opendocument.chart
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.chart-template
>>>73 string formula
>>>>80 byte !0x2d Formula
!:mime application/vnd.oasis.opendocument.formula
>>>>80 string -template Template
!:mime application/vnd.oasis.opendocument.formula-template
>>>73 string database Database
!:mime application/vnd.oasis.opendocument.database
>>>73 string image
>>>>78 byte !0x2d Image
!:mime application/vnd.oasis.opendocument.image
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.image-template
# EPUB (OEBPS) books using OCF (OEBPS Container Format)
# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
# From: Ralf Brown <ralf.brown@gmail.com>
>0x1E string mimetypeapplication/epub+zip EPUB document
!:mime application/epub+zip
# Catch other ZIP-with-mimetype formats
# In a ZIP file, the bytes immediately after a member's contents are
# always "PK". The 2 regex rules here print the "mimetype" member's
# contents up to the first 'P'. Luckily, most MIME types don't contain
# any capital 'P's. This is a kludge.
# (mimetype contains "application/<OTHER>")
>>50 string !epub+zip
>>>50 string !vnd.oasis.opendocument.
>>>>50 string !vnd.sun.xml.
>>>>>50 string !vnd.kde.
>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# (mimetype contents other than "application/*")
>26 string \x8\0\0\0mimetype
>>38 string !application/
>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# Java Jar files
>(26.s+30) leshort 0xcafe Java Jar file data (zip)
!:mime application/jar
# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
# Next line excludes specialized formats:
>(26.s+30) leshort !0xcafe
>>26 string !\x8\0\0\0mimetype Zip archive data
!:mime application/zip
# Zoo archiver
20 lelong 0xfdc4a7dc Zoo archive data
!:mime application/x-zoo
# Shell archives
10 string #\ This\ is\ a\ shell\ archive shell archive text
!:mime application/octet-stream
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
# EET archive
# From: Tilman Sauerbeck <tilman@code-monkey.de>
0 belong 0x1ee7ff00 EET archive
!:mime application/x-eet
# Symbian installation files
# http://www.thouky.co.uk/software/psifs/sis.html
# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8 lelong 0x10000419 Symbian installation file
!:mime application/vnd.symbian.install
0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
!:mime x-epoc/x-sisx-app

View file

@ -1,19 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $
# make: file(1) magic for assembler source
#
0 regex \^[\020\t]*\\.asciiz assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.byte assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.even assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.globl assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.text assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.file assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.type assembler source text
!:mime text/x-asm

View file

@ -1,149 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $
# audio: file(1) magic for sound formats (see also "iff")
#
# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
# and others
#
# Sun/NeXT audio data
0 string .snd Sun/NeXT audio data:
>12 belong 1 8-bit ISDN mu-law,
!:mime audio/basic
>12 belong 2 8-bit linear PCM [REF-PCM],
!:mime audio/basic
>12 belong 3 16-bit linear PCM,
!:mime audio/basic
>12 belong 4 24-bit linear PCM,
!:mime audio/basic
>12 belong 5 32-bit linear PCM,
!:mime audio/basic
>12 belong 6 32-bit IEEE floating point,
!:mime audio/basic
>12 belong 7 64-bit IEEE floating point,
!:mime audio/basic
>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-adpcm
# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
# that uses little-endian encoding and has a different magic number
0 lelong 0x0064732E DEC audio data:
>12 lelong 1 8-bit ISDN mu-law,
!:mime audio/x-dec-basic
>12 lelong 2 8-bit linear PCM [REF-PCM],
!:mime audio/x-dec-basic
>12 lelong 3 16-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 4 24-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 5 32-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 6 32-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 7 64-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-dec-basic
# Creative Labs AUDIO stuff
0 string MThd Standard MIDI data
!:mime audio/midi
0 string CTMF Creative Music (CMF) data
!:mime audio/x-unknown
0 string SBI SoundBlaster instrument data
!:mime audio/x-unknown
0 string Creative\ Voice\ File Creative Labs voice data
!:mime audio/x-unknown
# Real Audio (Magic .ra\0375)
0 belong 0x2e7261fd RealAudio sound file
!:mime audio/x-pn-realaudio
0 string .RMF\0\0\0 RealMedia file
!:mime application/vnd.rn-realmedia
# mime types according to http://www.geocities.com/nevilo/mod.htm:
# audio/it .it
# audio/x-zipped-it .itz
# audio/xm fasttracker modules
# audio/x-s3m screamtracker modules
# audio/s3m screamtracker modules
# audio/x-zipped-mod mdz
# audio/mod mod
# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
#
# Taken from loader code from mikmod version 2.14
# by Steve McIntyre (stevem@chiark.greenend.org.uk)
# <doj@cubic.org> added title printing on 2003-06-24
0 string MAS_UTrack_V00
>14 string >/0 ultratracker V1.%.1s module sound data
!:mime audio/x-mod
#audio/x-tracker-module
0 string Extended\ Module: Fasttracker II module sound data
!:mime audio/x-mod
#audio/x-tracker-module
21 string/c =!SCREAM! Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
21 string BMOD2STM Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
1080 string M.K. 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string M!K! 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string FLT4 4-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string FLT8 8-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string 4CHN 4-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 6CHN 6-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 8CHN 8-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string CD81 8-channel Octalyser module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
1080 string OKTA 8-channel Octalyzer module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
# Not good enough.
#1082 string CH
#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data
1080 string 16CN 16-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
1080 string 32CN 32-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
# Impulse tracker module (audio/x-it)
0 string IMPM Impulse Tracker module sound data -
!:mime audio/x-mod
# Free lossless audio codec <http://flac.sourceforge.net>
# From: Przemyslaw Augustyniak <silvathraec@rpg.pl>
0 string fLaC FLAC audio bitstream data
!:mime audio/x-flac
# Monkey's Audio compressed audio format (.ape)
# From danny.milo@gmx.net (Danny Milosavljevic)
# New version from Abel Cheung <abel (@) oaka.org>
0 string MAC\040 Monkey's Audio compressed format
!:mime audio/x-ape
# musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
0 string MP+ Musepack audio
!:mime audio/x-musepack

View file

@ -1,47 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
# c-lang: file(1) magic for C and related languages programs
#
# BCPL
0 search/8192 "libhdr" BCPL source text
!:mime text/x-bcpl
0 search/8192 "LIBHDR" BCPL source text
!:mime text/x-bcpl
# C
0 regex \^#include C source text
!:mime text/x-c
0 regex \^char C source text
!:mime text/x-c
0 regex \^double C source text
!:mime text/x-c
0 regex \^extern C source text
!:mime text/x-c
0 regex \^float C source text
!:mime text/x-c
0 regex \^struct C source text
!:mime text/x-c
0 regex \^union C source text
!:mime text/x-c
0 search/8192 main( C source text
!:mime text/x-c
# C++
# The strength of these rules is increased so they beat the C rules above
0 regex \^template C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^virtual C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^class C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^public: C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^private: C++ source text
!:strength + 5
!:mime text/x-c++

View file

@ -1,31 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $
# Cafe Babes unite!
#
# Since Java bytecode and Mach-O universal binaries have the same magic number,
# the test must be performed in the same "magic" sequence to get both right.
# The long at offset 4 in a Mach-O universal binary tells the number of
# architectures; the short at offset 4 in a Java bytecode file is the JVM minor
# version and the short at offset 6 is the JVM major version. Since there are only
# only 18 labeled Mach-O architectures at current, and the first released
# Java class format was version 43.0, we can safely choose any number
# between 18 and 39 to test the number of architectures against
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
### JAVA START ###
0 belong 0xcafebabe
!:mime application/x-java-applet
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
### JAVA END ###

View file

@ -1,82 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $
# commands: file(1) magic for various shells and interpreters
#
#0 string/w : shell archive or script for antique kernel text
0 string/wt #!\ /bin/sh POSIX shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/csh C shell script text executable
!:mime text/x-shellscript
# korn shell magic, sent by George Wu, gwu@clyde.att.com
0 string/wt #!\ /bin/ksh Korn shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
#
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/local/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable
!:mime text/x-gawk
#
0 string/wt #!\ /bin/awk awk script text executable
!:mime text/x-awk
0 string/wt #!\ /usr/bin/awk awk script text executable
!:mime text/x-awk
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
0 string/wt #!\ /bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
# PHP scripts
# Ulf Harnhammar <ulfh@update.uu.se>
0 search/1/c =<?php PHP script text
!:strength + 10
!:mime text/x-php
0 search/1 =<?\n PHP script text
!:mime text/x-php
0 search/1 =<?\r PHP script text
!:mime text/x-php
0 search/1/w #!\ /usr/local/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
0 search/1/w #!\ /usr/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
# Smarty compiled template, http://www.smarty.net/
# Elan Ruusamae <glen@delfi.ee>
0 string =<?php\ /*\ Smarty\ version Smarty compiled template
>24 regex [0-9.]+ \b, version %s
!:mime text/x-php

View file

@ -1,77 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $
# compress: file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
!:apple LZIVZIVU
# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
# * Original filename is only at offset 10 if "extra field" absent
# * Produce shorter output - notably, only report compression methods
# other than 8 ("deflate", the only method defined in RFC 1952).
0 string \037\213 gzip compressed data
!:mime application/x-gzip
# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
0 string \037\036 packed data
!:mime application/octet-stream
#
# This magic number is byte-order-independent.
0 short 0x1f1f old packed data
!:mime application/octet-stream
# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
0 short 0x1fff compacted data
!:mime application/octet-stream
# This string is valid for SunOS (BE) and a matching "short" is listed
# in the Ultrix (LE) magic file.
0 string \377\037 compacted data
!:mime application/octet-stream
0 short 0145405 huf output
!:mime application/octet-stream
# bzip2
0 string BZh bzip2 compressed data
!:mime application/x-bzip2
# lzip
0 string LZIP lzip compressed data
!:mime application/x-lzip
# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# http://www.7-zip.org or DOC/7zFormat.txt
#
0 string 7z\274\257\047\034 7-zip archive data,
>6 byte x version %d
>7 byte x \b.%d
!:mime application/x-7z-compressed
# Type: LZMA
0 lelong&0xffffff =0x5d
>12 leshort =0xff LZMA compressed data,
>>5 lequad =0xffffffffffffffff streamed
>>5 lequad !0xffffffffffffffff non-streamed, size %lld
!:mime application/x-lzma
# http://tukaani.org/xz/xz-file-format.txt
0 ustring \xFD7zXZ\x00 XZ compressed data
!:mime application/x-xz
# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
0 string LRZI LRZIP compressed data
>4 byte x - version %d
>5 byte x \b.%d
!:mime application/x-lrzip

View file

@ -1,47 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $
# database: file(1) magic for various databases
#
# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
#
#
# GDBM magic numbers
# Will be maintained as part of the GDBM distribution in the future.
# <downsj@teeny.org>
0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian
!:mime application/x-gdbm
0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian
!:mime application/x-gdbm
0 string GDBM GNU dbm 2.x database
!:mime application/x-gdbm
#
# Berkeley DB
#
# Ian Darwin's file /etc/magic files: big/little-endian version.
#
# Hash 1.85/1.86 databases store metadata in network byte order.
# Btree 1.85/1.86 databases store the metadata in host byte order.
# Hash and Btree 2.X and later databases store the metadata in host byte order.
0 long 0x00061561 Berkeley DB
!:mime application/x-dbm
# MS Access database
4 string Standard\ Jet\ DB Microsoft Access Database
!:mime application/x-msaccess
4 string Standard\ ACE\ DB Microsoft Access Database
!:mime application/x-msaccess
# Tokyo Cabinet magic data
# http://tokyocabinet.sourceforge.net/index.html
0 string ToKyO\ CaBiNeT\n Tokyo Cabinet
>14 string x \b (%s)
>32 byte 0 \b, Hash
!:mime application/x-tokyocabinet-hash
>32 byte 1 \b, B+ tree
!:mime application/x-tokyocabinet-btree
>32 byte 2 \b, Fixed-length
!:mime application/x-tokyocabinet-fixed
>32 byte 3 \b, Table
!:mime application/x-tokyocabinet-table

View file

@ -1,25 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $
# diff: file(1) magic for diff(1) output
#
0 search/1 diff\ diff output text
!:mime text/x-diff
0 search/1 ***\ diff output text
!:mime text/x-diff
0 search/1 Only\ in\ diff output text
!:mime text/x-diff
0 search/1 Common\ subdirectories:\ diff output text
!:mime text/x-diff
0 search/1 Index: RCS/CVS diff output text
!:mime text/x-diff
# unified diff
0 search/4096 ---\
>&0 search/1024 \n
>>&0 search/1 +++\
>>>&0 search/1024 \n
>>>>&0 search/1 @@ unified diff output text
!:mime text/x-diff
!:strength + 90

View file

@ -1,43 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# elf: file(1) magic for ELF executables
#
# We have to check the byte order flag to see what byte order all the
# other stuff in the header is in.
#
# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
#
# Created by: unknown
# Modified by (1): Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (2): Peter Tobias <tobias@server.et-inf.fho-emden.de> (core support)
# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk@ds9.au.s.shuttle.de> (fix of core support)
# Modified by (4): <gerardo.cacciari@gmail.com> (VMS Itanium)
# Modified by (5): Matthias Urlichs <smurf@debian.org> (Listing of many architectures)
0 string \177ELF ELF
>4 byte 0 invalid class
>4 byte 1 32-bit
>4 byte 2 64-bit
>5 byte 0 invalid byte order
>5 byte 1 LSB
>>16 leshort 0 no file type,
!:strength *2
!:mime application/octet-stream
>>16 leshort 1 relocatable,
!:mime application/x-object
>>16 leshort 2 executable,
!:mime application/x-executable
>>16 leshort 3 shared object,
!:mime application/x-sharedlib
>>16 leshort 4 core file
!:mime application/x-coredump
>5 byte 2 MSB
>>16 beshort 0 no file type,
!:mime application/octet-stream
>>16 beshort 1 relocatable,
!:mime application/x-object
>>16 beshort 2 executable,
!:mime application/x-executable
>>16 beshort 3 shared object,
!:mime application/x-sharedlib
>>16 beshort 4 core file,
!:mime application/x-coredump

View file

@ -1,34 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz <hpicollo@worldnet.fr> and Peter Breitenlohner <peb@mppmu.mpg.de>
# Useful information for improving this file can be found at:
# http://software.frodo.looijaard.name/psiconv/formats/Index.html
#------------------------------------------------------------------------------
0 lelong 0x10000037 Psion Series 5
>4 lelong 0x10000042 multi-bitmap image
!:mime image/x-epoc-mbm
>4 lelong 0x1000006D
>>8 lelong 0x1000007D Sketch image
!:mime image/x-epoc-sketch
>>8 lelong 0x1000007F Word file
!:mime application/x-epoc-word
>>8 lelong 0x10000085 OPL program (TextEd)
!:mime application/x-epoc-opl
>>8 lelong 0x10000088 Sheet file
!:mime application/x-epoc-sheet
>4 lelong 0x10000073 OPO module
!:mime application/x-epoc-opo
>4 lelong 0x10000074 OPL application
!:mime application/x-epoc-app
0 lelong 0x10000050 Psion Series 5
>4 lelong 0x1000006D database
>>8 lelong 0x10000084 Agenda file
!:mime application/x-epoc-agenda
>>8 lelong 0x10000086 Data file
!:mime application/x-epoc-data
>>8 lelong 0x10000CEA Jotter file
!:mime application/x-epoc-jotter

View file

@ -1,12 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $
# filesystems: file(1) magic for different filesystems
#
# CDROM Filesystems
# Modified for UDF by gerardo.cacciari@gmail.com
32769 string CD001 #
!:mime application/x-iso9660-image
37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
!:mime application/x-iso9660-image

View file

@ -1,18 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $
# flash: file(1) magic for Macromedia Flash file format
#
# See
#
# http://www.macromedia.com/software/flash/open/
#
0 string FWS Macromedia Flash data,
>3 byte x version %d
!:mime application/x-shockwave-flash
0 string CWS Macromedia Flash data (compressed),
!:mime application/x-shockwave-flash
# From: Cal Peake <cp@absolutedigital.net>
0 string FLV Macromedia Flash Video
!:mime video/x-flv

View file

@ -1,32 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $
# fonts: file(1) magic for font data
#
# X11 font files in SNF (Server Natural Format) format
# updated by Joerg Jenderek at Feb 2013
# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm
0 belong 00000004 X11 SNF font data, MSB first
#>104 belong 00000004 X11 SNF font data, MSB first
!:mime application/x-font-sfn
# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX
0 lelong 00000004
>104 lelong 00000004 X11 SNF font data, LSB first
!:mime application/x-font-sfn
# True Type fonts
0 string \000\001\000\000\000 TrueType font data
!:mime application/x-font-ttf
# Opentype font data from Avi Bercovich
0 string OTTO OpenType font data
!:mime application/vnd.ms-opentype
# Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu
0 string SplineFontDB: Spline Font Database
!:mime application/vnd.font-fontforge-sfd
# EOT
34 string LP Embedded OpenType (EOT)
!:mime application/vnd.ms-fontobject

View file

@ -1,7 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
# FORTRAN source
0 regex/100 \^[Cc][\ \t] FORTRAN program
!:mime text/x-fortran
!:strength - 5

View file

@ -1,31 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# frame: file(1) magic for FrameMaker files
#
# This stuff came on a FrameMaker demo tape, most of which is
# copyright, but this file is "published" as witness the following:
#
# Note that this is the Framemaker Maker Interchange Format, not the
# Normal format which would be application/vnd.framemaker.
#
0 string \<MakerFile FrameMaker document
!:mime application/x-mif
0 string \<MIFFile FrameMaker MIF (ASCII) file
!:mime application/x-mif
0 search/1 \<MakerDictionary FrameMaker Dictionary text
!:mime application/x-mif
0 string \<MakerScreenFont FrameMaker Font file
!:mime application/x-mif
0 string \<MML FrameMaker MML file
!:mime application/x-mif
0 string \<BookFile FrameMaker Book file
!:mime application/x-mif
# XXX - this book entry should be verified, if you find one, uncomment this
#0 string \<Book\ FrameMaker Book (ASCII) file
#!:mime application/x-mif
#>6 string 3.0 (3.0)
#>6 string 2.0 (2.0)
#>6 string 1.0 (1.0)
0 string \<Maker Intermediate Print File FrameMaker IPL file
!:mime application/x-mif

View file

@ -1,13 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
# GIMP Gradient: file(1) magic for the GIMP's gradient data files
# by Federico Mena <federico@nuclecu.unam.mx>
#------------------------------------------------------------------------------
# XCF: file(1) magic for the XCF image format used in the GIMP developed
# by Spencer Kimball and Peter Mattis
# ('Bucky' LaDieu, nega@vt.edu)
0 string gimp\ xcf GIMP XCF image data,
!:mime image/x-xcf

View file

@ -1,23 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $
# gnu: file(1) magic for various GNU tools
#
# GNU nlsutils message catalog file format
#
# GNU message catalog (.mo and .gmo files)
# GnuPG
# The format is very similar to pgp
# Note: magic.mime had 0x8501 for the next line instead of 0x8502
0 beshort 0x8502 GPG encrypted data
!:mime text/PGP # encoding: data
# This magic is not particularly good, as the keyrings don't have true
# magic. Nevertheless, it covers many keyrings.
0 beshort 0x9901 GPG key public ring
!:mime application/x-gnupg-keyring
# gettext message catalogue
0 regex \^msgid\ GNU gettext message catalogue text
!:mime text/x-po

View file

@ -1,8 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# gnumeric: file(1) magic for Gnumeric spreadsheet
# This entry is only semi-helpful, as Gnumeric compresses its files, so
# they will ordinarily reported as "compressed", but at least -z helps
39 string =<gmr:Workbook Gnumeric spreadsheet
!:mime application/x-gnumeric

View file

@ -1,51 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# icc: file(1) magic for International Color Consortium file formats
#
# Color profiles as per the ICC's "Image technology colour management -
# Architecture, profile format, and data structure" specification.
# See
#
# http://www.color.org/specification/ICC1v43_2010-12.pdf
#
# for Specification ICC.1:2010 (Profile version 4.3.0.0).
#
# Bytes 36 to 39 contain a generic profile file signature of "acsp";
# bytes 40 to 43 "may be used to identify the primary platform/operating
# system framework for which the profile was created".
#
# There are other fields that might be worth dumping as well.
#
# This appears to be what's used for Apple ColorSync profiles.
# Instead of adding that, Apple just changed the generic "acsp" entry
# to be for "ColorSync ICC Color Profile" rather than "Kodak Color
# Management System, ICC Profile".
# Yes, it's "APPL", not "AAPL"; see the spec.
36 string acspAPPL ColorSync ICC Profile
!:mime application/vnd.iccprofile
# Microsoft ICM color profile
36 string acspMSFT Microsoft ICM Color Profile
!:mime application/vnd.iccprofile
# Yes, that's a blank after "SGI".
36 string acspSGI\ SGI ICC Profile
!:mime application/vnd.iccprofile
# XXX - is this what's used for the Sun KCMS or not? The standard file
# uses just "acsp" for that, but Apple's file uses it for "ColorSync",
# and there *is* an identified "primary platform" value of SUNW.
36 string acspSUNW Sun KCMS ICC Profile
!:mime application/vnd.iccprofile
# Any other profile.
# XXX - should we use "acsp\0\0\0\0" for "no primary platform" profiles,
# and use "acsp" for everything else and dump the "primary platform"
# string in those cases?
36 string acsp ICC Profile
!:mime application/vnd.iccprofile

View file

@ -1,21 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
# iff: file(1) magic for Interchange File Format (see also "audio" & "images")
#
# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic
# Arts for file interchange. It has also been used by Apple, SGI, and
# especially Commodore-Amiga.
#
# IFF files begin with an 8 byte FORM header, followed by a 4 character
# FORM type, which is followed by the first chunk in the FORM.
0 string FORM IFF data
#>4 belong x \b, FORM is %d bytes long
# audio formats
>8 string AIFF \b, AIFF audio
!:mime audio/x-aiff
>8 string AIFC \b, AIFF-C compressed audio
!:mime audio/x-aiff
>8 string 8SVX \b, 8SVX 8-bit sampled sound voice
!:mime audio/x-aiff

View file

@ -1,255 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $
# images: file(1) magic for image formats (see also "iff", and "c-lang" for
# XPM bitmaps)
#
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
# additions by janl@ifi.uio.no as well as others. Jan also suggested
# merging several one- and two-line files into here.
#
# little magic: PCX (first byte is 0x0a)
# PBMPLUS images
# The next byte following the magic is always whitespace.
# strength is changed to try these patterns before "x86 boot sector"
0 search/1 P1
>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 search/1 P2
>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 search/1 P3 Netpbm PPM image text
>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P4
>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 string P5
>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 string P6
>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P7 Netpbm PAM image file
!:mime image/x-portable-pixmap
# NIFF (Navy Interchange File Format, a modification of TIFF) images
# [GRR: this *must* go before TIFF]
0 string IIN1 NIFF image data
!:mime image/x-niff
# Canon RAW version 1 (CRW) files are a type of Canon Image File Format
# (CIFF) file. These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html
0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data
!:mime image/x-canon-crw
# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic
# number. Put this above the TIFF test to make sure we detect them.
# These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2
0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data
!:mime image/x-canon-cr2
# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
# The second word of TIFF files is the TIFF version number, 42, which has
# never changed. The TIFF specification recommends testing for it.
0 string MM\x00\x2a TIFF image data, big-endian
!:mime image/tiff
0 string II\x2a\x00 TIFF image data, little-endian
!:mime image/tiff
0 string MM\x00\x2b Big TIFF image data, big-endian
!:mime image/tiff
0 string II\x2b\x00 Big TIFF image data, little-endian
!:mime image/tiff
# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
# (Greg Roelofs, newt@uchicago.edu)
# (Albert Cahalan, acahalan@cs.uml.edu)
#
# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
#
0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
!:mime image/png
# possible GIF replacements; none yet released!
# (Greg Roelofs, newt@uchicago.edu)
#
# GRR 950115: this was mine ("Zip GIF"):
0 string GIF94z ZIF image (GIF+deflate alpha)
!:mime image/x-unknown
#
# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
#
0 string FGF95a FGF image (GIF+deflate beta)
!:mime image/x-unknown
#
# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
# (best; not yet implemented):
#
0 string PBF PBF image (deflate compression)
!:mime image/x-unknown
# GIF
0 string GIF8 GIF image data
!:mime image/gif
!:apple 8BIMGIFf
# From: Joerg Jenderek <joerg.jen.der.ek@gmx.net>
# most files with the extension .EPA and some with .BMP
0 string \x11\x06 Award BIOS Logo, 136 x 84
!:mime image/x-award-bioslogo
0 string \x11\x09 Award BIOS Logo, 136 x 126
!:mime image/x-award-bioslogo
#0 string \x07\x1f BIOS Logo corrupted?
# http://www.blackfiveservices.co.uk/awbmtools.shtml
# http://biosgfx.narod.ru/v3/
# http://biosgfx.narod.ru/abr-2/
0 string AWBM
>4 leshort <1981 Award BIOS bitmap
!:mime image/x-award-bmp
# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu)
0 string BM
>14 leshort 12 PC bitmap, OS/2 1.x format
!:mime image/x-ms-bmp
>14 leshort 64 PC bitmap, OS/2 2.x format
!:mime image/x-ms-bmp
>14 leshort 40 PC bitmap, Windows 3.x format
!:mime image/x-ms-bmp
>14 leshort 128 PC bitmap, Windows NT/2000 format
!:mime image/x-ms-bmp
# XPM icons (Greg Roelofs, newt@uchicago.edu)
0 search/1 /*\ XPM\ */ X pixmap image text
!:mime image/x-xpmi
# DICOM medical imaging data
128 string DICM DICOM medical imaging data
!:mime application/dicom
# XWD - X Window Dump file.
# As described in /usr/X11R6/include/X11/XWDFile.h
# used by the xwd program.
# Bradford Castalia, idaeim, 1/01
# updated by Adam Buchbinder, 2/09
# The following assumes version 7 of the format; the first long is the length
# of the header, which is at least 25 4-byte longs, and the one at offset 8
# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth,
# which is a maximum of 32.
0 belong >100
>8 belong <3
>>12 belong <33
>>>4 belong 7 XWD X Window Dump image data
!:mime image/x-xwindowdump
# PCX image files
# From: Dan Fandrich <dan@coneharvesters.com>
# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX
# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt
# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000
# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT
0 ubelong&0xffF8fe00 0x0a000000
# for PCX bit depth > 0
>3 ubyte >0
# test for valid versions
>>1 ubyte <6
>>>1 ubyte !1 PCX
!:mime image/x-pcx
# Adobe Photoshop
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string 8BPS Adobe Photoshop Image
!:mime image/vnd.adobe.photoshop
# Summary: DjVu image / document
# Extension: .djvu
# Reference: http://djvu.org/docs/DjVu3Spec.djvu
# Submitted by: Stephane Loeuillet <stephane.loeuillet@tiscali.fr>
# Modified by (1): Abel Cheung <abelcheung@gmail.com>
0 string AT&TFORM
>12 string DJVM DjVu multiple page document
!:mime image/vnd.djvu
>12 string DJVU DjVu image or single page document
!:mime image/vnd.djvu
>12 string DJVI DjVu shared document
!:mime image/vnd.djvu
>12 string THUM DjVu page thumbnails
!:mime image/vnd.djvu
# Originally by Marc Espie
# Modified by Robert Minsk <robertminsk at yahoo.com>
# http://www.openexr.com/openexrfilelayout.pdf
0 lelong 20000630 OpenEXR image data,
!:mime image/x-exr
# SMPTE Digital Picture Exchange Format, SMPTE DPX
#
# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital
# Moving-Picture Exchange (DPX), v1.0, 18 February 1994
# Robert Minsk <robertminsk at yahoo.com>
0 string SDPX DPX image data, big-endian,
!:mime image/x-dpx
#-----------------------------------------------------------------------
# Hierarchical Data Format, used to facilitate scientific data exchange
# specifications at http://hdf.ncsa.uiuc.edu/
0 belong 0x0e031301 Hierarchical Data Format (version 4) data
!:mime application/x-hdf
0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data
!:mime application/x-hdf
# http://www.cartesianinc.com/Tech/
0 string CPC\262 Cartesian Perceptual Compression image
!:mime image/x-cpi
# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches
# From: Markus Heidelberg <markus.heidelberg at web.de>
0 string/t [BitmapInfo2] Polar Monitor Bitmap text
!:mime image/x-polar-monitor-bitmap
# Type: Olympus ORF raw images.
# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
0 string MMOR Olympus ORF raw image data, big-endian
!:mime image/x-olympus-orf
0 string IIRO Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
0 string IIRS Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
# Type: Foveon X3F
# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# Note that the MIME type isn't defined anywhere that I can find; if
# there's a canonical type for this format, it should replace this one.
0 string FOVb Foveon X3F raw image data
!:mime image/x-x3f
# Paint.NET file
# From Adam Buchbinder <adam.buchbinder@gmail.com>
0 string PDN3 Paint.NET image data
!:mime image/x-paintnet

View file

@ -1,16 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
# same magic number, 0xcafebabe, so they are both handled
# in the entry called "cafebabe".
#------------------------------------------------------------
0 belong 0xfeedfeed Java KeyStore
!:mime application/x-java-keystore
0 belong 0xcececece Java JCE KeyStore
!:mime application/x-java-jce-keystore
# Java source
0 regex ^import.*;$ Java source
!:mime text/x-java

View file

@ -1,17 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: $
# javascript: magic for javascript and node.js scripts.
#
0 search/1/w #!/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ node Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable
!:mime application/javascript

View file

@ -1,31 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $
# JPEG images
# SunOS 5.5.1 had
#
# 0 string \377\330\377\340 JPEG file
# 0 string \377\330\377\356 JPG file
#
# both of which turn into "JPEG image data" here.
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
!:apple 8BIMJPEG
!:strength +2
# From: David Santinoli <david@santinoli.com>
0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000
# From: Johan van der Knijff <johan.vanderknijff@kb.nl>
# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes
# https://github.com/bitsgalore/jp2kMagic
#
# Now read value of 'Brand' field, which yields a few possibilities:
>20 string \x6a\x70\x32\x20 Part 1 (JP2)
!:mime image/jp2
>20 string \x6a\x70\x78\x20 Part 2 (JPX)
!:mime image/jpx
>20 string \x6a\x70\x6d\x20 Part 6 (JPM)
!:mime image/jpm
>20 string \x6d\x6a\x70\x32 Part 3 (MJ2)
!:mime video/mj2

View file

@ -1,11 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
# kde: file(1) magic for KDE
0 string/t [KDE\ Desktop\ Entry] KDE desktop entry
!:mime application/x-kdelnk
0 string/t #\ KDE\ Config\ File KDE config file
!:mime application/x-kdelnk
0 string/t #\ xmcd xmcd database file for kscd
!:mime text/x-xmcd

View file

@ -1,30 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
# Type: Google KML, formerly Keyhole Markup Language
# Future development of this format has been handed
# over to the Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string/t \<?xml
>20 search/400 \ xmlns=
>>&0 regex ['"]http://earth.google.com/kml Google KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: OpenGIS KML, formerly Keyhole Markup Language
# This standard is maintained by the
# Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: Google KML Archive (ZIP based)
# http://code.google.com/apis/kml/documentation/kml_tut.html
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string PK\003\004
>4 byte 0x14
>>30 string doc.kml Compressed Google KML Document, including resources.
!:mime application/vnd.google-earth.kmz

View file

@ -1,22 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $
# linux: file(1) magic for Linux files
#
# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
# The following basic Linux magic is useful for reference, but using
# "long" magic is a better practice in order to avoid collisions.
#
# 2 leshort 100 Linux/i386
# >0 leshort 0407 impure executable (OMAGIC)
# >0 leshort 0410 pure executable (NMAGIC)
# >0 leshort 0413 demand-paged executable (ZMAGIC)
# >0 leshort 0314 demand-paged executable (QMAGIC)
#
# SYSLINUX boot logo files (from 'ppmtolss16' sources)
# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename:
# file extension .lss .16
0 lelong =0x1413f33d SYSLINUX' LSS16 image data
# syslinux-4.05/mime/image/x-lss16.xml
!:mime image/x-lss16

View file

@ -1,42 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# lisp: file(1) magic for lisp programs
#
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
# updated by Joerg Jenderek
# GRR: This lot is too weak
#0 string ;;
# windows INF files often begin with semicolon and use CRLF as line end
# lisp files are mainly created on unix system with LF as line end
#>2 search/4096 !\r Lisp/Scheme program text
#>2 search/4096 \r Windows INF file
0 search/4096 (setq\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defvar\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defparam\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defun\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (autoload\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (custom-set-variables\ Lisp/Scheme program text
!:mime text/x-lisp
# Emacs 18 - this is always correct, but not very magical.
0 string \012( Emacs v18 byte-compiled Lisp data
!:mime application/x-elc
# Emacs 19+ - ver. recognition added by Ian Springer
# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
# - Chris Chittleborough <cchittleborough@yahoo.com.au>
0 string ;ELC
>4 byte >18
>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data
!:mime application/x-elc
# From: David Allouche <david@allouche.net>
0 search/1 \<TeXmacs| TeXmacs document text
!:mime text/texmacs

View file

@ -1,17 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
# lua: file(1) magic for Lua scripting language
# URL: http://www.lua.org/
# From: Reuben Thomas <rrt@sc3d.org>, Seo Sanghyeon <tinuviel@sparcs.kaist.ac.kr>
# Lua scripts
0 search/1/w #!\ /usr/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1 #!/usr/bin/env\ lua Lua script text executable
!:mime text/x-lua
0 search/1 #!\ /usr/bin/env\ lua Lua script text executable
!:mime text/x-lua

View file

@ -1,7 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for M4 scripts
#
0 regex \^dnl\ M4 macro processor script text
!:mime text/x-m4

View file

@ -1,21 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $
# macintosh description
#
# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
# Daniel Quinlan, quinlan@yggdrasil.com
11 string must\ be\ converted\ with\ BinHex BinHex binary text
!:mime application/mac-binhex40
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
!:mime application/x-stuffit
!:apple SIT!SIT!
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
!:apple SIT!SIT!
#>162 string >0 : %s

View file

@ -1,35 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $
# mail.news: file(1) magic for mail and news
#
# Unfortunately, saved netnews also has From line added in some news software.
#0 string From mail text
0 string/t Relay-Version: old news text
!:mime message/rfc822
0 string/t #!\ rnews batched news text
!:mime message/rfc822
0 string/t N#!\ rnews mailed, batched news text
!:mime message/rfc822
0 string/t Forward\ to mail forwarding text
!:mime message/rfc822
0 string/t Pipe\ to mail piping text
!:mime message/rfc822
0 string/tc delivered-to: SMTP mail text
!:mime message/rfc822
0 string/tc return-path: SMTP mail text
!:mime message/rfc822
0 string/t Path: news text
!:mime message/news
0 string/t Xref: news text
!:mime message/news
0 string/t From: news or mail text
!:mime message/rfc822
0 string/t Article saved news text
!:mime message/news
0 string/t Received: RFC 822 mail text
!:mime message/rfc822
# TNEF files...
0 lelong 0x223E9F78 Transport Neutral Encapsulation Format
!:mime application/vnd.ms-tnef

View file

@ -1,16 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for makefiles
#
0 regex \^CFLAGS makefile script text
!:mime text/x-makefile
0 regex \^LDFLAGS makefile script text
!:mime text/x-makefile
0 regex \^all: makefile script text
!:mime text/x-makefile
0 regex \^.PRECIOUS makefile script text
!:mime text/x-makefile
0 regex \^SUBDIRS automake makefile script text
!:mime text/x-makefile

View file

@ -1,29 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#--------------------------------------------
# marc21: file(1) magic for MARC 21 Format
#
# Kevin Ford (kefo@loc.gov)
#
# MARC21 formats are for the representation and communication
# of bibliographic and related information in machine-readable
# form. For more info, see http://www.loc.gov/marc/
# leader position 20-21 must be 45
20 string 45
# leader starts with 5 digits, followed by codes specific to MARC format
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
!:mime application/marc
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
!:mime application/marc
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
!:mime application/marc
# leader position 22-23, should be "00" but is it?
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
!:mime application/marc

View file

@ -1,17 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $
# matroska: file(1) magic for Matroska files
#
# See http://www.matroska.org/
#
# EBML id:
0 belong 0x1a45dfa3
# DocType id:
>4 search/4096 \x42\x82
# DocType contents:
>>&1 string webm WebM
!:mime video/webm
>>&1 string matroska Matroska data
!:mime video/x-matroska

View file

@ -1,9 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#-----------------------------------------------------------------------------
# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $
# misctools: file(1) magic for miscellaneous UNIX tools.
#
0 string/c BEGIN:VCALENDAR vCalendar calendar file
!:mime text/calendar
0 string/c BEGIN:VCARD vCard visiting card
!:mime text/x-vcard

View file

@ -1,368 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
# msdos: file(1) magic for MS-DOS files
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek at Oct 2008,Apr 2011
0 string/t @
>1 string/cW \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW rem DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW set\ DOS batch file text
!:mime text/x-msdos-batch
# Tests for various EXE types.
#
# Many of the compressed formats were extraced from IDARC 1.23 source code.
#
0 string/b MZ DOS MZ
!:mime application/x-dosexec
# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
>0x18 leshort <0x40 MS-DOS executable
# These traditional tests usually work but not always. When test quality support is
# implemented these can be turned on.
#>>0x18 leshort 0x1c (Borland compiler)
#>>0x18 leshort 0x1e (MS compiler)
# If the relocation table is 0x40 or more bytes into the file, it's definitely
# not a DOS EXE.
>0x18 leshort >0x3f
# Maybe it's a PE?
>>(0x3c.l) string PE\0\0 PE
>>>(0x3c.l+24) leshort 0x010b \b32 executable
>>>(0x3c.l+24) leshort 0x020b \b32+ executable
>>>(0x3c.l+24) leshort 0x0107 ROM image
>>>(0x3c.l+24) default x Unknown PE signature
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
>>>(0x3c.l+92) leshort 1 (native)
>>>(0x3c.l+92) leshort 2 (GUI)
>>>(0x3c.l+92) leshort 3 (console)
>>>(0x3c.l+92) leshort 7 (POSIX)
>>>(0x3c.l+92) leshort 9 (Windows CE)
>>>(0x3c.l+92) leshort 10 (EFI application)
>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
>>>(0x3c.l+92) leshort 13 (EFI ROM)
>>>(0x3c.l+92) leshort 14 (XBOX)
>>>(0x3c.l+92) leshort 15 (Windows boot application)
>>>(0x3c.l+92) default x (Unknown subsystem
>>>>&0 leshort x 0x%x)
>>>(0x3c.l+4) leshort 0x14c Intel 80386
>>>(0x3c.l+4) leshort 0x166 MIPS R4000
>>>(0x3c.l+4) leshort 0x168 MIPS R10000
>>>(0x3c.l+4) leshort 0x184 Alpha
>>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3
>>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4
>>>(0x3c.l+4) leshort 0x1c0 ARM
>>>(0x3c.l+4) leshort 0x1c2 ARM Thumb
>>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb
>>>(0x3c.l+4) leshort 0x1f0 PowerPC
>>>(0x3c.l+4) leshort 0x200 Intel Itanium
>>>(0x3c.l+4) leshort 0x266 MIPS16
>>>(0x3c.l+4) leshort 0x268 Motorola 68000
>>>(0x3c.l+4) leshort 0x290 PA-RISC
>>>(0x3c.l+4) leshort 0x366 MIPSIV
>>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU
>>>(0x3c.l+4) leshort 0xebc EFI byte code
>>>(0x3c.l+4) leshort 0x8664 x86-64
>>>(0x3c.l+4) leshort 0xc0ee MSIL
>>>(0x3c.l+4) default x Unknown processor type
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB)
>>>(0x3c.l+22) leshort&0x1000 >0 system file
>>>(0x3c.l+24) leshort 0x010b
>>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
>>>(0x3c.l+24) leshort 0x020b
>>>>(0x3c.l+248) lelong >0 Mono/.Net assembly
# hooray, there's a DOS extender using the PE format, with a valid PE
# executable inside (which just prints a message and exits if run in win)
>>>(8.s*16) string 32STUB \b, 32rtm DOS extender
>>>(8.s*16) string !32STUB \b, for MS Windows
>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
>>>(0x3c.l+0xf8) search/0x140 UPX2
>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>(0x3c.l+0xf8) search/0x140 .idata
>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .rsrc
>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .data
>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
>>>>(0x3c.l+0xf7) byte x
>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
>>>0x30 string Inno \b, InnoSetup self-extracting archive
# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
# must be one of the unusual subformats.
>>(0x3c.l) string !PE\0\0 MS-DOS executable
>>(0x3c.l) string NE \b, NE
>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
>>>(0x3c.l+0x36) byte 3 for MS-DOS
>>>(0x3c.l+0x36) byte 4 for Windows 386
>>>(0x3c.l+0x36) byte 5 for Borland Operating System Services
>>>(0x3c.l+0x36) default x
>>>>(0x3c.l+0x36) byte x (unknown OS %x)
>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
>>(0x3c.l) string LX\0\0 \b, LX
>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
>>>(0x3c.l+0x0a) leshort 1 for OS/2
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
>>>(0x3c.l+0x08) leshort 1 i80286
>>>(0x3c.l+0x08) leshort 2 i80386
>>>(0x3c.l+0x08) leshort 3 i80486
>>>(8.s*16) string emx \b, emx
>>>>&1 string x %s
>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
# MS Windows system file, supposedly a collection of LE executables
>>(0x3c.l) string W3 \b, W3 for MS Windows
>>(0x3c.l) string LE\0\0 \b, LE executable
>>>(0x3c.l+0x0a) leshort 1
# some DOS extenders use LE files with OS/2 header
>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
# this is a wild guess; hopefully it is a specific signature
>>>>&0x24 lelong <0x50
>>>>>(&0x4c.l) string \xfc\xb8WATCOM
>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
# fails with DOS-Extenders.
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
# looks like ASCII, probably some embedded copyright message.
# and definitely not NE/LE/LX/PE
>>0x3c lelong >0x20000000
>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
# header data too small for extended executable
>2 long !0
>>0x18 leshort <0x40
>>>(4.s*512) leshort !0x014c
>>>>&(2.s-514) string !LE
>>>>>&-2 string !BW \b, MZ for MS-DOS
>>>>&(2.s-514) string LE \b, LE
>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
# educated guess since indirection is still not capable enough for complex offset
# calculations (next embedded executable would be at &(&2*512+&0-2)
# I suspect there are only LE executables in these multi-exe files
>>>>&(2.s-514) string BW
>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
# This sequence skips to the first COFF segment, usually .text
>(4.s*512) leshort 0x014c \b, COFF
>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
>>(8.s*16) string emx
>>>&1 string x for DOS, Win or OS/2, emx %s
>>&(&0x42.l-3) byte x
>>>&0x26 string UPX \b, UPX compressed
# and yet another guess: small .text, and after large .data is unusal, could be 32lite
>>&0x2c search/0xa0 .text
>>>&0x0b lelong <0x2000
>>>>&0 lelong >0x6000 \b, 32lite compressed
>(8.s*16) string $WdX \b, WDos/X DOS extender
# By now an executable type should have been printed out. The executable
# may be a self-uncompressing archive, so look for evidence of that and
# print it out.
#
# Some signatures below from Greg Roelofs, newt@uchicago.edu.
#
>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
>0xe7 string LH/2\ Self-Extract \b, %s
>0x1c string UC2X \b, UCEXE compressed
>0x1c string WWP\ \b, WWPACK compressed
>0x1c string RJSX \b, ARJ self-extracting archive
>0x1c string diet \b, diet compressed
>0x1c string LZ09 \b, LZEXE v0.90 compressed
>0x1c string LZ91 \b, LZEXE v0.91 compressed
>0x1c string tz \b, TinyProg compressed
>0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
!:mime application/zip
# Yes, this really is "Copr", not "Corp."
>0x1e string PKLITE\ Copr. Self-extracting PKZIP archive
!:mime application/zip
# winarj stores a message in the stub instead of the sig in the MZ header
>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
>0x20 string AIN
>>0x23 string 2 \b, AIN 2.x compressed
>>0x23 string <2 \b, AIN 1.x compressed
>>0x23 string >2 \b, AIN 1.x compressed
>0x24 string LHa's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string LHA's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string \ $ARX \b, ARX self-extracting archive
>0x24 string \ $LHarc \b, LHarc self-extracting archive
>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
>0x40 string aPKG \b, aPackage self-extracting archive
>0x64 string W\ Collis\0\0 \b, Compack compressed
>0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
>>&0xf4 search/0x140 \x0\x40\x1\x0
>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
>1638 string -lh5- \b, LHa self-extracting archive v2.13S
>0x17888 string Rar! \b, RAR self-extracting archive
# Skip to the end of the EXE. This will usually work fine in the PE case
# because the MZ image is hardcoded into the toolchain and almost certainly
# won't match any of these signatures.
>(4.s*512) long x
>>&(2.s-517) byte x
>>>&0 string PK\3\4 \b, ZIP self-extracting archive
>>>&0 string Rar! \b, RAR self-extracting archive
>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
>>>&7 search/400 **ACE** \b, ACE self-extracting archive
>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
# a few unknown ZIP sfxes, no idea if they are needed or if they are
# already captured by the generic patterns above
>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
#
# TELVOX Teleinformatica CODEC self-extractor for OS/2:
>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
>>49824 leshort =1 \b, 1 file
>>49824 leshort >1 \b, %u files
# Popular applications
2080 string Microsoft\ Word\ 6.0\ Document %s
!:mime application/msword
2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
!:mime application/msword
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
2112 string MSWordDoc Microsoft Word document data
!:mime application/msword
#
0 belong 0x31be0000 Microsoft Word Document
!:mime application/msword
#
0 string/b PO^Q` Microsoft Word 6.0 Document
!:mime application/msword
#
0 string/b \376\067\0\043 Microsoft Office Document
!:mime application/msword
0 string/b \333\245-\0\0\0 Microsoft Office Document
!:mime application/msword
512 string/b \354\245\301 Microsoft Word Document
!:mime application/msword
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
#
2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
!:mime application/vnd.ms-excel
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
!:mime application/vnd.ms-excel
#
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
2114 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
# Italian MS-Excel
2121 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
!:mime application/vnd.ms-excel
#
0 belong 0x00001a00 Lotus 1-2-3
!:mime application/x-123
#
0 belong 0x00000200 Lotus 1-2-3
!:mime application/x-123
0 string/b WordPro\0 Lotus WordPro
!:mime application/vnd.lotus-wordpro
0 string/b WordPro\r\373 Lotus WordPro
!:mime application/vnd.lotus-wordpro
# Windows icons (Ian Springer <ips@fpk.hp.com>)
0 string/b \000\000\001\000 MS Windows icon resource
!:mime image/x-icon
# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
# only for windows versions equal or greater 3.0
0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File
!:mime application/x-dosexec
# TNEF magic From "Joomy" <joomy@se-ed.net>
# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
0 leshort 0x223e9f78 TNEF
!:mime application/vnd.ms-tnef
#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed
# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
# False positive with PPT (also currently this string is too long)
#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document
#>48 byte 0x1B Excel Document
#!:mime application/vnd.ms-excel
>546 string bjbj Microsoft Word Document
!:mime application/msword
>546 string jbjb Microsoft Word Document
!:mime application/msword
0 string/b \224\246\056 Microsoft Word Document
!:mime application/msword
512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
!:mime application/msword
# MS eBook format (.lit)
0 string/b ITOLITLS Microsoft Reader eBook Data
>8 lelong x \b, version %u
!:mime application/x-ms-reader

View file

@ -1,12 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
# From: Mikhail Gusarov <dottedmag@dottedmag.net>
# NekoVM (http://nekovm.org/) bytecode
0 string NEKO NekoVM bytecode
>4 lelong x (%d global symbols,
>8 lelong x %d global fields,
>12 lelong x %d bytecode ops)
!:mime application/x-nekovm-bytecode

View file

@ -1,11 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pascal: file(1) magic for Pascal source
#
0 search/8192 (input, Pascal source text
!:mime text/x-pascal
0 regex \^program Pascal source text
!:mime text/x-pascal
0 regex \^record Pascal source text
!:mime text/x-pascal

View file

@ -1,8 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pdf: file(1) magic for Portable Document Format
#
0 string %PDF- PDF document
!:mime application/pdf

View file

@ -1,26 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $
# perl: file(1) magic for Larry Wall's perl language.
#
# The `eval' lines recognizes an outrageously clever hack.
# Keith Waclena <keith@cerberus.uchicago.edu>
# Send additions to <perl5-porters@perl.org>
0 search/1/w #!\ /bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/local/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text
!:mime text/x-perl
0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text
!:mime text/x-perl
0 search/1 #!/usr/bin/env\ perl Perl script text executable
!:mime text/x-perl
0 search/1 #!\ /usr/bin/env\ perl Perl script text executable
!:mime text/x-perl

View file

@ -1,27 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pgp: file(1) magic for Pretty Good Privacy
# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
#
0 beshort 0x9900 PGP key public ring
!:mime application/x-pgp-keyring
0 beshort 0x9501 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0x9500 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0xa600 PGP encrypted data
#!:mime application/pgp-encrypted
#0 string -----BEGIN\040PGP text/PGP armored data
!:mime text/PGP # encoding: armored data
#>15 string PUBLIC\040KEY\040BLOCK- public key block
#>15 string MESSAGE- message
#>15 string SIGNED\040MESSAGE- signed message
#>15 string PGP\040SIGNATURE- signature
2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block
!:mime application/pgp-keys
0 string -----BEGIN\040PGP\40MESSAGE- PGP message
!:mime application/pgp
0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature
!:mime application/pgp-signature

View file

@ -1,7 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pkgadd: file(1) magic for SysV R4 PKG Datastreams
#
0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4)
!:mime application/x-svr4-package

View file

@ -1,14 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $
# printer: file(1) magic for printer-formatted files
#
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT

View file

@ -1,46 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $
# python: file(1) magic for python
#
0 search/1/w #!\ /usr/bin/python Python script text executable
!:mime text/x-python
0 search/1/w #!\ /usr/local/bin/python Python script text executable
!:mime text/x-python
0 search/1 #!/usr/bin/env\ python Python script text executable
!:mime text/x-python
0 search/1 #!\ /usr/bin/env\ python Python script text executable
!:mime text/x-python
# from module.submodule import func1, func2
0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
!:mime text/x-python
# def __init__ (self, ...):
0 search/4096 def\ __init__
>&0 search/64 self Python script text executable
!:mime text/x-python
# comments
0 search/4096 '''
>&0 regex .*'''$ Python script text executable
!:mime text/x-python
0 search/4096 """
>&0 regex .*"""$ Python script text executable
!:mime text/x-python
# try:
# except: or finally:
# block
0 search/4096 try:
>&0 regex \^\\s*except.*: Python script text executable
!:mime text/x-python
>&0 search/4096 finally: Python script text executable
!:mime text/x-python
# def name(args, args):
0 regex \^(\ |\\t)*def\ +[a-zA-Z]+
>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
!:mime text/x-python

View file

@ -1,36 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $
# riff: file(1) magic for RIFF format
# See
#
# http://www.seanet.com/users/matts/riffmci/riffmci.htm
#
# AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
#
0 string RIFF RIFF (little-endian) data
# Microsoft WAVE format (*.wav)
>8 string WAVE \b, WAVE audio
!:mime audio/x-wav
# Corel Draw Picture
>8 string CDRA \b, Corel Draw Picture
!:mime image/x-coreldraw
# AVI == Audio Video Interleave
>8 string AVI\040 \b, AVI
!:mime video/x-msvideo
#------------------------------------------------------------------------------
# Sony Wave64
# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf
# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian
0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data
# 128 bit + total file size (64 bits) so 24 bytes
# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A }
>24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio
!:mime audio/x-w64
#------------------------------------------------------------------------------
# MBWF/RF64
# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf
0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio
!:mime audio/x-wav

View file

@ -1,12 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $
#
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
#
0 belong 0xedabeedb RPM
!:mime application/x-rpm
#delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM
!:mime application/x-rpm

View file

@ -1,9 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# rtf: file(1) magic for Rich Text Format (RTF)
#
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
#
0 string {\\rtf Rich Text Format data,
!:mime text/rtf

View file

@ -1,28 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $
# ruby: file(1) magic for Ruby scripting language
# URL: http://www.ruby-lang.org/
# From: Reuben Thomas <rrt@sc3d.org>
# Ruby scripts
0 search/1/w #!\ /usr/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!/usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
# What looks like ruby, but does not have a shebang
# (modules and such)
# From: Lubomir Rintel <lkundrak@v3.sk>
0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+'
>0 regex include\ [A-Z]|def\ [a-z]|\ do$
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text
!:mime text/x-ruby
0 regex \^[\ \t]*(class|module)[\ \t][A-Z]
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text
!:mime text/x-ruby

View file

@ -1,7 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# sc: file(1) magic for "sc" spreadsheet
#
38 string Spreadsheet sc spreadsheet file
!:mime application/x-sc

View file

@ -1,82 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $
# Type: SVG Vectorial Graphics
# From: Noel Torres <tecnico@ejerciciosresueltos.com>
0 string \<?xml\ version="
>15 string >\0
>>19 search/4096 \<svg SVG Scalable Vector Graphics image
!:mime image/svg+xml
>>19 search/4096 \<gnc-v2 GnuCash file
!:mime application/x-gnucash
# Sitemap file
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096 \<urlset XML Sitemap document text
!:mime application/xml-sitemap
# xhtml
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version='
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<html broken XHTML document text
!:mime text/html
#------------------------------------------------------------------------------
# sgml: file(1) magic for Standard Generalized Markup Language
# HyperText Markup Language (HTML) is an SGML document type,
# from Daniel Quinlan (quinlan@yggdrasil.com)
# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
0 search/4096/cWt \<!doctype\ html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<head HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<title HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<script HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<style HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<table HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<a\ href= HTML document text
!:mime text/html
!:strength + 5
# Extensible markup language (XML), a subset of SGML
# from Marc Prud'hommeaux (marc@apocalypse.org)
0 search/1/cwt \<?xml XML document text
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version\ " XML
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version=" XML
!:mime application/xml
!:strength + 5
0 string \<?xml\ version=' XML
!:mime application/xml
!:strength + 5
0 search/1/wbt \<?xml XML document text
!:mime application/xml
!:strength - 10
0 search/1/wt \<?XML broken XML document text
!:mime application/xml
!:strength - 10

View file

@ -1,17 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# sniffer: file(1) magic for packet capture files
#
# From: guy@alum.mit.edu (Guy Harris)
#
#
# "libpcap" capture files.
# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
# the main program that uses that format, but there are other programs
# that use "libpcap", or that use the same capture file format.)
#
0 ubelong 0xa1b2c3d4 tcpdump capture file (big-endian)
!:mime application/vnd.tcpdump.pcap
0 ulelong 0xa1b2c3d4 tcpdump capture file (little-endian)
!:mime application/vnd.tcpdump.pcap

View file

@ -1,23 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# file: file(1) magic for Tcl scripting language
# URL: http://www.tcl.tk/
# From: gustaf neumann
# Tcl scripts
0 search/1/w #!\ /usr/bin/tcl Tcl script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/local/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl

View file

@ -1,56 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: tex,v 1.17 2010/09/20 19:19:17 rrt Exp $
# tex: file(1) magic for TeX files
#
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
#
# From <conklin@talisman.kaleida.com>
# Although we may know the offset of certain text fields in TeX DVI
# and font files, we can't use them reliably because they are not
# zero terminated. [but we do anyway, christos]
0 string \367\002 TeX DVI file
!:mime application/x-dvi
# There is no way to detect TeX Font Metric (*.tfm) files without
# breaking them apart and reading the data. The following patterns
# match most *.tfm files generated by METAFONT or afm2tfm.
2 string \000\021 TeX font metric data
!:mime application/x-tex-tfm
2 string \000\022 TeX font metric data
!:mime application/x-tex-tfm
# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/1 \\input\ texinfo Texinfo source text
!:mime text/x-texinfo
0 search/1 This\ is\ Info\ file GNU Info text
!:mime text/x-info
# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/4096 \\input TeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\section LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\setlength LaTeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\documentstyle LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\chapter LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\documentclass LaTeX 2e document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\relax LaTeX auxiliary file
!:mime text/x-tex
!:strength + 15
0 search/4096 \\contentsline LaTeX table of contents
!:mime text/x-tex
!:strength + 15
0 search/4096 %\ -*-latex-*- LaTeX document text
!:mime text/x-tex

View file

@ -1,22 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# troff: file(1) magic for *roff
#
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
# troff input
0 search/1 .\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '.\\" troff or preprocessor input text
!:mime text/troff
0 search/1 \\" troff or preprocessor input text
!:mime text/troff
0 search/1 ''' troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
!:mime text/troff

View file

@ -1,26 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# vorbis: file(1) magic for Ogg/Vorbis files
#
# From Felix von Leitner <leitner@fefe.de>
# Extended by Beni Cherniavsky <cben@crosswinds.net>
# Further extended by Greg Wooledge <greg@wooledge.org>
#
# Most (everything but the number of channels and bitrate) is commented
# out with `##' as it's not interesting to the average user. The most
# probable things advanced users would want to uncomment are probably
# the number of comments and the encoder version.
#
# FIXME: The first match has been made a search, so that it can skip
# over prepended ID3 tags. This will work for MIME type detection, but
# won't work for detecting other properties of the file (they all need
# to be made relative to the search). In any case, if the file has ID3
# tags, the ID3 information will be printed, not the Ogg information,
# so until that's fixed, this doesn't matter.
# FIXME[2]: Disable the above for now, since search assumes text mode.
#
# --- Ogg Framing ---
#0 search/1000 OggS Ogg data
0 string OggS Ogg data
!:mime application/ogg

View file

@ -1,14 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
# warc: file(1) magic for WARC files
0 string WARC/ WARC Archive
>5 string x version %.4s
!:mime application/warc
#------------------------------------------------------------------------------
# Arc File Format from Internet Archive
# see http://www.archive.org/web/researcher/ArcFileFormat.php
0 string filedesc:// Internet Archive File
!:mime application/x-ia-arc

View file

@ -1,19 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
# windows: file(1) magic for Microsoft Windows
#
# This file is mainly reserved for files where programs
# using them are run almost always on MS Windows 3.x or
# above, or files only used exclusively in Windows OS,
# where there is no better category to allocate for.
# For example, even though WinZIP almost run on Windows
# only, it is better to treat them as "archive" instead.
# For format usable in DOS, such as generic executable
# format, please specify under "msdos" file.
#
# From: Pal Tamas <folti@balabit.hu>
# Autorun File
0 string/c [autorun]\r\n Microsoft Windows Autorun file.
!:mime application/x-setupscript.

View file

@ -1,43 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $
# wordprocessors: file(1) magic fo word processors.
#
# Hangul (Korean) Word Processor File
# From: Won-Kyu Park <wkpark@kldp.org>
512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000
!:mime application/x-hwp
# Quark Express from http://www.garykessler.net/library/file_sigs.html
2 string MMXPR3 Motorola Quark Express Document (English)
!:mime application/x-quark-xpress-3
#------------------------------------------------------------------------------
# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
#
# Contributor kenzo-:
# Reversed-engineered JS Ichitaro magic numbers
#
0 string DOC
>43 byte 0x14 Just System Word Processor Ichitaro v4
!:mime application/x-ichitaro4
0 string DOC
>43 byte 0x15 Just System Word Processor Ichitaro v5
!:mime application/x-ichitaro5
0 string DOC
>43 byte 0x16 Just System Word Processor Ichitaro v6
!:mime application/x-ichitaro6
# Type: Freemind mindmap documents
# From: Jamie Thompson <debian-bugs@jamie-thompson.co.uk>
0 string/w \<map\ version Freemind document
!:mime application/x-freemind
# Type: Scribus
# From: Werner Fink <werner@suse.de>
0 string \<SCRIBUSUTF8NEW\ Version Scribus Document
!:mime application/x-scribus

View file

@ -1,11 +0,0 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: xwindows,v 1.7 2011/05/03 01:44:17 christos Exp $
# xwindows: file(1) magic for various X/Window system file formats.
# Xcursor data
# X11 mouse cursor format defined in libXcursor, see
# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
0 string Xcur Xcursor data
!:mime image/x-xcursor

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,38 @@
@load base/frameworks/files
@load base/utils/paths
module FileExtract;
export {
## The prefix where files are extracted to.
const prefix = "./extract_files/" &redef;
redef record Files::Info += {
## Local filenames of extracted file.
extracted: string &optional &log;
};
redef record Files::AnalyzerArgs += {
## The local filename to which to write an extracted file.
## This field is used in the core by the extraction plugin
## to know where to write the file to. It's also optional
extract_filename: string &optional;
};
}
function on_add(f: fa_file, args: Files::AnalyzerArgs)
{
if ( ! args?$extract_filename )
args$extract_filename = cat("extract-", f$source, "-", f$id);
f$info$extracted = args$extract_filename;
args$extract_filename = build_path_compressed(prefix, args$extract_filename);
}
event bro_init() &priority=10
{
Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
# Create the extraction directory.
mkdir(prefix);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,32 @@
@load base/frameworks/files
module FileHash;
export {
redef record Files::Info += {
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
};
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}

View file

@ -81,6 +81,13 @@ export {
## Returns: The analyzer name corresponding to the tag. ## Returns: The analyzer name corresponding to the tag.
global name: function(tag: Analyzer::Tag) : string; global name: function(tag: Analyzer::Tag) : string;
## Translates an analyzer's name to a tag enum value.
##
## name: The analyzer name.
##
## Returns: The analyzer tag corresponding to the name.
global get_tag: function(name: string): Analyzer::Tag;
## Schedules an analyzer for a future connection originating from a given IP ## Schedules an analyzer for a future connection originating from a given IP
## address and port. ## address and port.
## ##
@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string
return __name(atype); return __name(atype);
} }
function get_tag(name: string): Analyzer::Tag
{
return __tag(name);
}
function schedule_analyzer(orig: addr, resp: addr, resp_p: port, function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool analyzer: Analyzer::Tag, tout: interval) : bool
{ {

View file

@ -1,261 +0,0 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Analyzer::Tag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -0,0 +1,371 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/analyzer
@load base/frameworks/logging
@load base/utils/site
module Files;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## The time when the file was first seen.
ts: time &log;
## An identifier associated with a single file.
fuid: string &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data sourced from.
tx_hosts: set[addr] &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data traveled to.
rx_hosts: set[addr] &log;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## A value to represent the depth of this file in relation
## to its source. In SMTP, it is the depth of the MIME
## attachment on the message. In HTTP, it is the depth of the
## request within the TCP connection.
depth: count &default=0 &log;
## A set of analysis types done during the file analysis.
analyzers: set[string] &log;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## A filename for the file if one is available from the source
## for the file. These will frequently come from
## "Content-Disposition" headers in network protocols.
filename: string &log &optional;
## The duration the file was analyzed for.
duration: interval &log &default=0secs;
## If the source of this file is a network connection, this field
## indicates if the data originated from the local network or not as
## determined by the configured bro:see:`Site::local_nets`.
local_orig: bool &log &optional;
## If the source of this file is a network connection, this field
## indicates if the file is being sent by the originator of the connection
## or the responder.
is_orig: bool &log &optional;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_fuid: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Files::Tag] of bool = table() &redef;
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## tag: the analyzer type.
##
## args: any parameters the analyzer takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Translates an file analyzer enum value to a string with the analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global analyzer_name: function(tag: Files::Tag): string;
## Provides a text description regarding metadata of the file.
## For example, with HTTP it would return a URL.
##
## f: The file to be described.
##
## Returns a text description regarding metadata of the file.
global describe: function(f: fa_file): string;
type ProtoRegistration: record {
## A callback to generate a file handle on demand when
## one is needed by the core.
get_file_handle: function(c: connection, is_orig: bool): string;
## A callback to "describe" a file. In the case of an HTTP
## transfer the most obvious description would be the URL.
## It's like an extremely compressed version of the normal log.
describe: function(f: fa_file): string
&default=function(f: fa_file): string { return ""; };
};
## Register callbacks for protocols that work with the Files framework.
## The callbacks must uniquely identify a file and each protocol can
## only have a single callback registered for it.
##
## tag: Tag for the protocol analyzer having a callback being registered.
##
## reg: A :bro:see:`ProtoRegistration` record.
##
## Returns: true if the protocol being registered was not previously registered.
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
## Register a callback for file analyzers to use if they need to do some manipulation
## when they are being added to a file before the core code takes over. This is
## unlikely to be interesting for users and should only be called by file analyzer
## authors but it *not required*.
##
## tag: Tag for the file analyzer.
##
## callback: Function to execute when the given file analyzer is being added.
global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_files: event(rec: Info);
}
redef record fa_file += {
info: Info &optional;
};
redef record AnalyzerArgs += {
# This is used interally for the core file analyzer api.
tag: Files::Tag &optional;
};
# Store the callbacks for protocol analyzers that have files.
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
event bro_init() &priority=5
{
Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
}
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info = Info($ts=f$last_active,
$fuid=f$id);
f$info = tmp;
}
if ( f?$parent_id )
f$info$parent_fuid = f$parent_id;
if ( f?$source )
f$info$source = f$source;
f$info$duration = f$last_active - f$info$ts;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes )
f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
if ( f?$is_orig )
f$info$is_orig = f$is_orig;
if ( f?$mime_type )
f$info$mime_type = f$mime_type;
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
# This is to construct the correct args for the core API.
args$tag = tag;
add f$info$analyzers[Files::analyzer_name(tag)];
if ( tag in analyzer_add_callbacks )
analyzer_add_callbacks[tag](f, args);
if ( ! __add_analyzer(f$id, args) )
{
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
return F;
}
return T;
}
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
{
analyzer_add_callbacks[tag] = callback;
}
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
args$tag = tag;
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function analyzer_name(tag: Files::Tag): string
{
return __analyzer_name(tag);
}
event file_new(f: fa_file) &priority=10
{
set_info(f);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
{
set_info(f);
add f$info$conn_uids[c$uid];
local cid = c$id;
add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
if( |Site::local_nets| > 0 )
f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
}
event file_timeout(f: fa_file) &priority=10
{
set_info(f);
f$info$timedout = T;
}
event file_state_remove(f: fa_file) &priority=10
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-10
{
Log::write(Files::LOG, f$info);
}
function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
{
local result = (tag !in registered_protocols);
registered_protocols[tag] = reg;
return result;
}
function describe(f: fa_file): string
{
local tag = Analyzer::get_tag(f$source);
if ( tag !in registered_protocols )
return "";
local handler = registered_protocols[tag];
return handler$describe(f);
}
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
{
if ( tag !in registered_protocols )
return;
local handler = registered_protocols[tag];
set_file_handle(handler$get_file_handle(c, is_orig));
}

View file

@ -68,6 +68,25 @@ export {
## the notice policy. ## the notice policy.
iconn: icmp_conn &optional; iconn: icmp_conn &optional;
## A file record if the notice is relted to a file. The
## reference to the actual fa_file record will be deleted after applying
## the notice policy.
f: fa_file &optional;
## A file unique ID if this notice is related to a file. If the $f
## field is provided, this will be automatically filled out.
fuid: string &log &optional;
## A mime type if the notice is related to a file. If the $f field
## is provided, this will be automatically filled out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## This field will typically be automatically filled out from an
## fa_file record. For example, if a notice was related to a
## file over HTTP, the URL of the request would be shown.
file_desc: string &log &optional;
## The transport protocol. Filled automatically when either conn, iconn ## The transport protocol. Filled automatically when either conn, iconn
## or p is specified. ## or p is specified.
proto: transport_proto &log &optional; proto: transport_proto &log &optional;
@ -460,10 +479,28 @@ function apply_policy(n: Notice::Info)
if ( ! n?$ts ) if ( ! n?$ts )
n$ts = network_time(); n$ts = network_time();
if ( n?$f )
{
if ( ! n?$fuid )
n$fuid = n$f$id;
if ( ! n?$file_mime_type && n$f?$mime_type )
n$file_mime_type = n$f$mime_type;
n$file_desc = Files::describe(n$f);
if ( n$f?$conns && |n$f$conns| == 1 )
{
for ( id in n$f$conns )
n$conn = n$f$conns[id];
}
}
if ( n?$conn ) if ( n?$conn )
{ {
if ( ! n?$id ) if ( ! n?$id )
n$id = n$conn$id; n$id = n$conn$id;
if ( ! n?$uid ) if ( ! n?$uid )
n$uid = n$conn$uid; n$uid = n$conn$uid;
} }
@ -513,13 +550,15 @@ function apply_policy(n: Notice::Info)
if ( ! n?$suppress_for ) if ( ! n?$suppress_for )
n$suppress_for = default_suppression_interval; n$suppress_for = default_suppression_interval;
# Delete the connection record if it's there so we aren't sending that # Delete the connection and file records if they're there so we
# to remote machines. It can cause problems due to the size of the # aren't sending that to remote machines. It can cause problems
# connection record. # due to the size of those records.
if ( n?$conn ) if ( n?$conn )
delete n$conn; delete n$conn;
if ( n?$iconn ) if ( n?$iconn )
delete n$iconn; delete n$iconn;
if ( n?$f )
delete n$f;
} }
function internal_NOTICE(n: Notice::Info) function internal_NOTICE(n: Notice::Info)

View file

@ -328,7 +328,7 @@ type fa_file: record {
## An identification of the source of the file data. E.g. it may be ## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file ## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source. ## path which was read, or some other input source.
source: string &optional; source: string;
## If the source of this file is is a network connection, this field ## If the source of this file is is a network connection, this field
## may be set to indicate the directionality. ## may be set to indicate the directionality.
@ -3050,6 +3050,6 @@ const snaplen = 8192 &redef;
@load base/frameworks/logging @load base/frameworks/logging
@load base/frameworks/input @load base/frameworks/input
@load base/frameworks/analyzer @load base/frameworks/analyzer
@load base/frameworks/file-analysis @load base/frameworks/files
@load base/bif @load base/bif

View file

@ -49,4 +49,7 @@
@load base/protocols/syslog @load base/protocols/syslog
@load base/protocols/tunnels @load base/protocols/tunnels
@load base/files/hash
@load base/files/extract
@load base/misc/find-checksum-offloading @load base/misc/find-checksum-offloading

View file

@ -1,7 +1,7 @@
@load ./utils-commands @load ./utils-commands
@load ./main @load ./main
@load ./file-analysis @load ./utils
@load ./file-extract @load ./files
@load ./gridftp @load ./gridftp
@load-sigs ./dpd.sig @load-sigs ./dpd.sig

View file

@ -1,48 +0,0 @@
@load ./main
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module FTP;
export {
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_handle_string(c: connection): string
{
return cat(Analyzer::ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
if ( info$passive )
# FTP client initiates data channel.
if ( is_orig )
# Don't care about FTP client data.
return "";
else
# Do care about FTP server data.
return get_handle_string(c);
else
# FTP server initiates dta channel.
if ( is_orig )
# Do care about FTP server data.
return get_handle_string(c);
else
# Don't care about FTP client data.
return "";
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));
}

View file

@ -1,90 +0,0 @@
##! File extraction support for FTP.
@load ./main
@load base/utils/files
module FTP;
export {
## Pattern of file mime types to extract from FTP transfers.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from FTP-data transfers.
const extraction_prefix = "ftp-item" &redef;
}
redef record Info += {
## On disk file where it was extracted to.
extraction_file: string &log &optional;
## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred.
extract_file: bool &default=F;
};
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
local s = ftp_data_expected[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( ! f?$info ) return;
for ( filename in f$info$extracted_files )
{
local s: FTP::Info;
s$ts = network_time();
s$tags = set();
s$user = "<ftp-data>";
s$extraction_file = filename;
if ( f?$conns )
for ( cid in f$conns )
{
s$uid = f$conns[cid]$uid;
s$id = cid;
}
Log::write(FTP::LOG, s);
}
}
event log_ftp(rec: Info) &priority=-10
{
delete rec$extraction_file;
delete rec$extract_file;
}

View file

@ -0,0 +1,60 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/files
module FTP;
export {
redef record Info += {
## File unique ID.
fuid: string &optional &log;
};
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Describe the file being transferred.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return "";
return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "FTP" )
return "";
for ( cid in f$conns )
{
if ( f$conns[cid]?$ftp )
return FTP::describe(f$conns[cid]$ftp);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_FTP_DATA,
[$get_file_handle = FTP::get_file_handle,
$describe = FTP::describe_file]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return;
local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p];
ftp$fuid = f$id;
if ( f?$mime_type )
ftp$mime_type = f$mime_type;
}

View file

@ -63,8 +63,6 @@ export {
reply_code: count &log &optional; reply_code: count &log &optional;
## Reply message from the server in response to the command. ## Reply message from the server in response to the command.
reply_msg: string &log &optional; reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log;
## Expected FTP data channel. ## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional; data_channel: ExpectedDataChannel &log &optional;
@ -104,6 +102,8 @@ export {
global log_ftp: event(rec: Info); global log_ftp: event(rec: Info);
} }
@load ./utils
# Add the state tracking information variable to the connection record # Add the state tracking information variable to the connection record
redef record connection += { redef record connection += {
ftp: Info &optional; ftp: Info &optional;
@ -171,37 +171,26 @@ function set_ftp_session(c: connection)
function ftp_message(s: Info) function ftp_message(s: Info)
{ {
# If it either has a tag associated with it (something detected) s$ts=s$cmdarg$ts;
# or it's a deliberately logged command. s$command=s$cmdarg$cmd;
if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
s$arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
s$arg = build_url_ftp(s);
if ( s$arg == "" )
delete s$arg;
if ( s?$password &&
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{ {
if ( s?$password && s$password = "<hidden>";
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{
s$password = "<hidden>";
}
local arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
{
local comp_path = build_path_compressed(s$cwd, arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
}
s$ts=s$cmdarg$ts;
s$command=s$cmdarg$cmd;
if ( arg == "" )
delete s$arg;
else
s$arg=arg;
Log::write(FTP::LOG, s);
} }
if ( s?$cmdarg && s$command in logged_commands)
Log::write(FTP::LOG, s);
# The MIME and file_size fields are specific to file transfer commands # The MIME and file_size fields are specific to file transfer commands
# and may not be used in all commands so they need reset to "blank" # and may not be used in all commands so they need reset to "blank"
# values after logging. # values after logging.
@ -209,8 +198,6 @@ function ftp_message(s: Info)
delete s$file_size; delete s$file_size;
# Same with data channel. # Same with data channel.
delete s$data_channel; delete s$data_channel;
# Tags are cleared everytime too.
s$tags = set();
} }
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel) function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
@ -218,8 +205,9 @@ function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
s$passive = chan$passive; s$passive = chan$passive;
s$data_channel = chan; s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s; ftp_data_expected[chan$resp_h, chan$resp_p] = s;
Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA, Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p,
5mins); Analyzer::ANALYZER_FTP_DATA,
5mins);
} }
event ftp_request(c: connection, command: string, arg: string) &priority=5 event ftp_request(c: connection, command: string, arg: string) &priority=5

View file

@ -0,0 +1,47 @@
##! Utilities specific for FTP processing.
@load ./main
@load base/utils/addrs
module FTP;
export {
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL, not prefixed by "ftp://".
global build_url: function(rec: Info): string;
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL prefixed with "ftp://".
global build_url_ftp: function(rec: Info): string;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
}
function build_url(rec: Info): string
{
if ( !rec?$arg )
return "";
local comp_path = build_path_compressed(rec$cwd, rec$arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path);
}
function build_url_ftp(rec: Info): string
{
return fmt("ftp://%s", build_url(rec));
}
function describe(rec: Info): string
{
return build_url_ftp(rec);
}

View file

@ -1,8 +1,6 @@
@load ./main @load ./main
@load ./entities
@load ./utils @load ./utils
@load ./file-analysis @load ./files
@load ./file-ident
@load ./file-hash
@load ./file-extract
@load-sigs ./dpd.sig @load-sigs ./dpd.sig

View file

@ -0,0 +1,109 @@
##! Analysis and logging for MIME entities found in HTTP sessions.
@load base/frameworks/files
@load base/utils/strings
@load base/utils/files
@load ./main
module HTTP;
export {
type Entity: record {
## Filename for the entity if discovered from a header.
filename: string &optional;
};
redef record Info += {
## An ordered vector of file unique IDs.
orig_fuids: vector of string &log &optional;
## An ordered vector of mime types.
orig_mime_types: vector of string &log &optional;
## An ordered vector of file unique IDs.
resp_fuids: vector of string &log &optional;
## An ordered vector of mime types.
resp_mime_types: vector of string &log &optional;
## The current entity.
current_entity: Entity &optional;
## Current number of MIME entities in the HTTP request message body.
orig_mime_depth: count &default=0;
## Current number of MIME entities in the HTTP response message body.
resp_mime_depth: count &default=0;
};
}
event http_begin_entity(c: connection, is_orig: bool) &priority=10
{
set_state(c, F, is_orig);
if ( is_orig )
++c$http$orig_mime_depth;
else
++c$http$resp_mime_depth;
c$http$current_entity = Entity();
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
{
if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
else if ( name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( f$source == "HTTP" && c?$http )
{
if ( c$http?$current_entity && c$http$current_entity?$filename )
f$info$filename = c$http$current_entity$filename;
if ( f$is_orig )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_fuids = string_vec(f$id);
else
c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_mime_types = string_vec(f$mime_type);
else
c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type;
}
}
else
{
if ( ! c$http?$resp_mime_types )
c$http$resp_fuids = string_vec(f$id);
else
c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$resp_mime_types )
c$http$resp_mime_types = string_vec(f$mime_type);
else
c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type;
}
}
}
}
event http_end_entity(c: connection, is_orig: bool) &priority=5
{
if ( c?$http && c$http?$current_entity )
delete c$http$current_entity;
}

View file

@ -1,54 +0,0 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
redef record HTTP::Info += {
## Number of MIME entities in the HTTP request message body so far.
request_mime_level: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
response_mime_level: count &default=0;
};
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
if ( ! c?$http )
return;
if ( is_orig )
++c$http$request_mime_level;
else
++c$http$response_mime_level;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
local mime_level: count =
is_orig ? c$http$request_mime_level : c$http$response_mime_level;
local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
if ( c$http$range_request )
return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, mime_level_str, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -1,100 +0,0 @@
##! Extracts the items from HTTP traffic, one per file. At this time only
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-analysis
module HTTP;
export {
## Pattern of file mime types to extract from HTTP response entity bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk location where files in request body were extracted.
extracted_request_files: vector of string &log &optional;
## On-disk location where files in response body were extracted.
extracted_response_files: vector of string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
function add_extraction_file(c: connection, is_orig: bool, fn: string)
{
if ( is_orig )
{
if ( ! c$http?$extracted_request_files )
c$http$extracted_request_files = vector();
c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
}
else
{
if ( ! c$http?$extracted_response_files )
c$http$extracted_response_files = vector();
c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
}
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
return;
}
local extracting: bool = F;
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
}

View file

@ -1,68 +0,0 @@
##! Calculate hashes for HTTP body transfers.
@load ./main
@load ./file-analysis
module HTTP;
export {
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
md5: string &log &optional;
## This value can be set per-transfer to determine per request
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( f?$mime_type && generate_md5 in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,105 +0,0 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/notice
@load ./main
@load ./utils
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
};
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
## that are not noteworthy before a notice is created. Each
## pattern added should match the complete URL (the matched URLs include
## "http://" at the beginning).
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -0,0 +1,56 @@
@load ./main
@load ./entities
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/files
module HTTP;
export {
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Default file describer for HTTP.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http )
return "";
if ( c$http$range_request && ! is_orig )
{
# Any multipart responses from the server are pieces of same file
# that correspond to range requests, so don't use mime depth to
# identify the file.
return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, build_url(c$http));
}
else
{
local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig,
c$http$trans_depth, mime_depth, id_string(c$id));
}
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "HTTP" )
return "";
for ( cid in f$conns )
{
if ( f$conns[cid]?$http )
return build_url_http(f$conns[cid]$http);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_HTTP,
[$get_file_handle = HTTP::get_file_handle,
$describe = HTTP::describe_file]);
}

View file

@ -1,5 +1,5 @@
##! Implements base functionality for HTTP analysis. The logging model is ##! Implements base functionality for HTTP analysis. The logging model is
##! to log request/response pairs and all relevant metadata together in ##! to log request/response pairs and all relevant metadata together in
##! a single record. ##! a single record.
@load base/utils/numbers @load base/utils/numbers
@ -15,10 +15,10 @@ export {
## Placeholder. ## Placeholder.
EMPTY EMPTY
}; };
## This setting changes if passwords used in Basic-Auth are captured or not. ## This setting changes if passwords used in Basic-Auth are captured or not.
const default_capture_password = F &redef; const default_capture_password = F &redef;
type Info: record { type Info: record {
## Timestamp for when the request happened. ## Timestamp for when the request happened.
ts: time &log; ts: time &log;
@ -26,7 +26,7 @@ export {
uid: string &log; uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports. ## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log; id: conn_id &log;
## Represents the pipelined depth into the connection of this ## Represents the pipelined depth into the connection of this
## request/response transaction. ## request/response transaction.
trans_depth: count &log; trans_depth: count &log;
## Verb used in the HTTP request (GET, POST, HEAD, etc.). ## Verb used in the HTTP request (GET, POST, HEAD, etc.).
@ -60,24 +60,24 @@ export {
## A set of indicators of various attributes discovered and ## A set of indicators of various attributes discovered and
## related to a particular request/response pair. ## related to a particular request/response pair.
tags: set[Tags] &log; tags: set[Tags] &log;
## Username if basic-auth is performed for the request. ## Username if basic-auth is performed for the request.
username: string &log &optional; username: string &log &optional;
## Password if basic-auth is performed for the request. ## Password if basic-auth is performed for the request.
password: string &log &optional; password: string &log &optional;
## Determines if the password will be captured for this request. ## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password; capture_password: bool &default=default_capture_password;
## All of the headers that may indicate if the request was proxied. ## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional; proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in ## Indicates if this request can assume 206 partial content in
## response. ## response.
range_request: bool &default=F; range_request: bool &default=F;
}; };
## Structure to maintain state for an HTTP connection with multiple ## Structure to maintain state for an HTTP connection with multiple
## requests and responses. ## requests and responses.
type State: record { type State: record {
## Pending requests. ## Pending requests.
@ -87,7 +87,7 @@ export {
## Current response in the pending queue. ## Current response in the pending queue.
current_response: count &default=0; current_response: count &default=0;
}; };
## A list of HTTP headers typically used to indicate proxied requests. ## A list of HTTP headers typically used to indicate proxied requests.
const proxy_headers: set[string] = { const proxy_headers: set[string] = {
"FORWARDED", "FORWARDED",
@ -100,8 +100,8 @@ export {
} &redef; } &redef;
## A list of HTTP methods. Other methods will generate a weird. Note ## A list of HTTP methods. Other methods will generate a weird. Note
## that the HTTP analyzer will only accept methods consisting solely ## that the HTTP analyzer will only accept methods consisting solely
## of letters ``[A-Za-z]``. ## of letters ``[A-Za-z]``.
const http_methods: set[string] = { const http_methods: set[string] = {
"GET", "POST", "HEAD", "OPTIONS", "GET", "POST", "HEAD", "OPTIONS",
"PUT", "DELETE", "TRACE", "CONNECT", "PUT", "DELETE", "TRACE", "CONNECT",
@ -111,8 +111,8 @@ export {
"POLL", "REPORT", "SUBSCRIBE", "BMOVE", "POLL", "REPORT", "SUBSCRIBE", "BMOVE",
"SEARCH" "SEARCH"
} &redef; } &redef;
## Event that can be handled to access the HTTP record as it is sent on ## Event that can be handled to access the HTTP record as it is sent on
## to the logging framework. ## to the logging framework.
global log_http: event(rec: Info); global log_http: event(rec: Info);
} }
@ -147,12 +147,12 @@ function new_http_session(c: connection): Info
tmp$ts=network_time(); tmp$ts=network_time();
tmp$uid=c$uid; tmp$uid=c$uid;
tmp$id=c$id; tmp$id=c$id;
# $current_request is set prior to the Info record creation so we # $current_request is set prior to the Info record creation so we
# can use the value directly here. # can use the value directly here.
tmp$trans_depth = c$http_state$current_request; tmp$trans_depth = c$http_state$current_request;
return tmp; return tmp;
} }
function set_state(c: connection, request: bool, is_orig: bool) function set_state(c: connection, request: bool, is_orig: bool)
{ {
if ( ! c?$http_state ) if ( ! c?$http_state )
@ -160,19 +160,19 @@ function set_state(c: connection, request: bool, is_orig: bool)
local s: State; local s: State;
c$http_state = s; c$http_state = s;
} }
# These deal with new requests and responses. # These deal with new requests and responses.
if ( request || c$http_state$current_request !in c$http_state$pending ) if ( request || c$http_state$current_request !in c$http_state$pending )
c$http_state$pending[c$http_state$current_request] = new_http_session(c); c$http_state$pending[c$http_state$current_request] = new_http_session(c);
if ( ! is_orig && c$http_state$current_response !in c$http_state$pending ) if ( ! is_orig && c$http_state$current_response !in c$http_state$pending )
c$http_state$pending[c$http_state$current_response] = new_http_session(c); c$http_state$pending[c$http_state$current_response] = new_http_session(c);
if ( is_orig ) if ( is_orig )
c$http = c$http_state$pending[c$http_state$current_request]; c$http = c$http_state$pending[c$http_state$current_request];
else else
c$http = c$http_state$pending[c$http_state$current_response]; c$http = c$http_state$pending[c$http_state$current_response];
} }
event http_request(c: connection, method: string, original_URI: string, event http_request(c: connection, method: string, original_URI: string,
unescaped_URI: string, version: string) &priority=5 unescaped_URI: string, version: string) &priority=5
{ {
@ -181,17 +181,17 @@ event http_request(c: connection, method: string, original_URI: string,
local s: State; local s: State;
c$http_state = s; c$http_state = s;
} }
++c$http_state$current_request; ++c$http_state$current_request;
set_state(c, T, T); set_state(c, T, T);
c$http$method = method; c$http$method = method;
c$http$uri = unescaped_URI; c$http$uri = unescaped_URI;
if ( method !in http_methods ) if ( method !in http_methods )
event conn_weird("unknown_HTTP_method", c, method); event conn_weird("unknown_HTTP_method", c, method);
} }
event http_reply(c: connection, version: string, code: count, reason: string) &priority=5 event http_reply(c: connection, version: string, code: count, reason: string) &priority=5
{ {
if ( ! c?$http_state ) if ( ! c?$http_state )
@ -199,7 +199,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
local s: State; local s: State;
c$http_state = s; c$http_state = s;
} }
# If the last response was an informational 1xx, we're still expecting # If the last response was an informational 1xx, we're still expecting
# the real response to the request, so don't create a new Info record yet. # the real response to the request, so don't create a new Info record yet.
if ( c$http_state$current_response !in c$http_state$pending || if ( c$http_state$current_response !in c$http_state$pending ||
@ -207,7 +207,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
! code_in_range(c$http_state$pending[c$http_state$current_response]$status_code, 100, 199)) ) ! code_in_range(c$http_state$pending[c$http_state$current_response]$status_code, 100, 199)) )
++c$http_state$current_response; ++c$http_state$current_response;
set_state(c, F, F); set_state(c, F, F);
c$http$status_code = code; c$http$status_code = code;
c$http$status_msg = reason; c$http$status_msg = reason;
if ( code_in_range(code, 100, 199) ) if ( code_in_range(code, 100, 199) )
@ -216,33 +216,33 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
c$http$info_msg = reason; c$http$info_msg = reason;
} }
} }
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=5 event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=5
{ {
set_state(c, F, is_orig); set_state(c, F, is_orig);
if ( is_orig ) # client headers if ( is_orig ) # client headers
{ {
if ( name == "REFERER" ) if ( name == "REFERER" )
c$http$referrer = value; c$http$referrer = value;
else if ( name == "HOST" ) else if ( name == "HOST" )
# The split is done to remove the occasional port value that shows up here. # The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1]; c$http$host = split1(value, /:/)[1];
else if ( name == "RANGE" ) else if ( name == "RANGE" )
c$http$range_request = T; c$http$range_request = T;
else if ( name == "USER-AGENT" ) else if ( name == "USER-AGENT" )
c$http$user_agent = value; c$http$user_agent = value;
else if ( name in proxy_headers ) else if ( name in proxy_headers )
{ {
if ( ! c$http?$proxied ) if ( ! c$http?$proxied )
c$http$proxied = set(); c$http$proxied = set();
add c$http$proxied[fmt("%s -> %s", name, value)]; add c$http$proxied[fmt("%s -> %s", name, value)];
} }
else if ( name == "AUTHORIZATION" ) else if ( name == "AUTHORIZATION" )
{ {
if ( /^[bB][aA][sS][iI][cC] / in value ) if ( /^[bB][aA][sS][iI][cC] / in value )
@ -264,25 +264,19 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
} }
} }
} }
else # server headers
{
if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
c$http$filename = extract_filename_from_content_disposition(value);
}
} }
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
{ {
set_state(c, F, is_orig); set_state(c, F, is_orig);
if ( is_orig ) if ( is_orig )
c$http$request_body_len = stat$body_length; c$http$request_body_len = stat$body_length;
else else
c$http$response_body_len = stat$body_length; c$http$response_body_len = stat$body_length;
} }
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = -5 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = -5
{ {
# The reply body is done so we're ready to log. # The reply body is done so we're ready to log.
@ -311,4 +305,4 @@ event connection_state_remove(c: connection) &priority=-5
} }
} }
} }

View file

@ -32,6 +32,9 @@ export {
## ##
## Returns: A URL prefixed with "http://". ## Returns: A URL prefixed with "http://".
global build_url_http: function(rec: Info): string; global build_url_http: function(rec: Info): string;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
} }
@ -62,3 +65,8 @@ function build_url_http(rec: Info): string
{ {
return fmt("http://%s", build_url(rec)); return fmt("http://%s", build_url(rec));
} }
function describe(rec: Info): string
{
return build_url_http(rec);
}

View file

@ -1,5 +1,5 @@
@load ./main @load ./main
@load ./dcc-send @load ./dcc-send
@load ./file-analysis @load ./files
@load-sigs ./dpd.sig @load-sigs ./dpd.sig

View file

@ -2,7 +2,7 @@
##! ##!
##! There is a major problem with this script in the cluster context because ##! There is a major problem with this script in the cluster context because
##! we might see A send B a message that a DCC connection is to be expected, ##! we might see A send B a message that a DCC connection is to be expected,
##! but that connection will actually be between B and C which could be ##! but that connection will actually be between B and C which could be
##! analyzed on a different worker. ##! analyzed on a different worker.
##! ##!
@ -15,12 +15,6 @@
module IRC; module IRC;
export { export {
## Pattern of file mime types to extract from IRC DCC file transfers.
const extract_file_types = /NO_DEFAULT/ &redef;
## On-disk prefix for files to be extracted from IRC DCC file transfers.
const extraction_prefix = "irc-dcc-item" &redef;
redef record Info += { redef record Info += {
## DCC filename requested. ## DCC filename requested.
dcc_file_name: string &log &optional; dcc_file_name: string &log &optional;
@ -28,101 +22,10 @@ export {
dcc_file_size: count &log &optional; dcc_file_size: count &log &optional;
## Sniffed mime type of the file. ## Sniffed mime type of the file.
dcc_mime_type: string &log &optional; dcc_mime_type: string &log &optional;
## The file handle for the file to be extracted
extraction_file: string &log &optional;
## A boolean to indicate if the current file transfer should be extracted.
extract_file: bool &default=F;
}; };
} }
global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins; global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins;
function set_dcc_mime(f: fa_file)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$dcc_mime_type = f$mime_type;
}
}
function set_dcc_extraction_file(f: fa_file, filename: string)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$extraction_file = filename;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
# this handler sets the IRC::Info mime type
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
if ( ! f?$mime_type ) return;
set_dcc_mime(f);
}
# this handler check if file extraction is desired
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
local fname: string;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
set_dcc_extraction_file(f, fname);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
s$extraction_file = fname;
return;
}
}
function log_dcc(f: fa_file) function log_dcc(f: fa_file)
{ {
@ -141,24 +44,21 @@ function log_dcc(f: fa_file)
Log::write(IRC::LOG, irc); Log::write(IRC::LOG, irc);
irc$command = tmp; irc$command = tmp;
# Delete these values in case another DCC transfer # Delete these values in case another DCC transfer
# happens during the IRC session. # happens during the IRC session.
delete irc$extract_file;
delete irc$extraction_file;
delete irc$dcc_file_name; delete irc$dcc_file_name;
delete irc$dcc_file_size; delete irc$dcc_file_size;
delete irc$dcc_mime_type; delete irc$dcc_mime_type;
delete dcc_expected_transfers[cid$resp_h, cid$resp_p];
return; return;
} }
} }
event file_new(f: fa_file) &priority=-5 event file_new(f: fa_file) &priority=-5
{ {
if ( ! f?$source ) return; if ( f$source == "IRC_DATA" )
if ( f$source != "IRC_DATA" ) return; log_dcc(f);
log_dcc(f);
} }
event irc_dcc_message(c: connection, is_orig: bool, event irc_dcc_message(c: connection, is_orig: bool,

View file

@ -1,25 +0,0 @@
@load ./dcc-send.bro
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module IRC;
export {
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( is_orig ) return "";
return cat(Analyzer::ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
set_file_handle(IRC::get_file_handle(c, is_orig));
}

View file

@ -0,0 +1,39 @@
@load ./dcc-send
@load base/utils/conn-ids
@load base/frameworks/files
module IRC;
export {
redef record Info += {
## File unique ID.
fuid: string &log &optional;
};
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_IRC_DATA,
[$get_file_handle = IRC::get_file_handle]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers )
return;
local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
irc$fuid = f$id;
if ( irc?$dcc_file_name )
f$info$filename = irc$dcc_file_name;
if ( f?$mime_type )
irc$dcc_mime_type = f$mime_type;
}

View file

@ -1,6 +1,5 @@
@load ./main @load ./main
@load ./entities @load ./entities
@load ./entities-excerpt @load ./files
@load ./file-analysis
@load-sigs ./dpd.sig @load-sigs ./dpd.sig

View file

@ -1,5 +1,6 @@
##! Analysis and logging for MIME entities found in SMTP sessions. ##! Analysis and logging for MIME entities found in SMTP sessions.
@load base/frameworks/files
@load base/utils/strings @load base/utils/strings
@load base/utils/files @load base/utils/files
@load ./main @load ./main
@ -7,217 +8,55 @@
module SMTP; module SMTP;
export { export {
redef enum Log::ID += { ENTITIES_LOG }; type Entity: record {
## Filename for the entity if discovered from a header.
type EntityInfo: record { filename: string &optional;
## This is the timestamp of when the MIME content transfer began.
ts: time &log;
uid: string &log;
id: conn_id &log;
## A count to represent the depth of this message transaction in a
## single connection where multiple messages were transferred.
trans_depth: count &log;
## The filename seen in the Content-Disposition header.
filename: string &log &optional;
## Track how many bytes of the MIME encoded file have been seen.
content_len: count &log &default=0;
## The mime type of the entity discovered through magic bytes identification.
mime_type: string &log &optional;
## The calculated MD5 sum for the MIME entity.
md5: string &log &optional;
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: string &log &optional;
}; };
redef record Info += { redef record Info += {
## The in-progress entity information. ## The current entity being seen.
current_entity: EntityInfo &optional; entity: Entity &optional;
}; };
redef record State += { redef record State += {
## Track the number of MIME encoded files transferred during a session. ## Track the number of MIME encoded files transferred
mime_level: count &default=0; ## during a session.
mime_depth: count &default=0;
}; };
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
## Pattern of file mime types to extract from MIME bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from MIME entity bodies.
const extraction_prefix = "smtp-entity" &redef;
## If set, never generate MD5s. This is mainly for testing purposes to create
## reproducable output in the case that the decision whether to create
## checksums depends on environment specifics.
const never_calc_md5 = F &redef;
global log_mime: event(rec: EntityInfo);
} }
event bro_init() &priority=5
{
Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
}
function set_session(c: connection, new_entity: bool)
{
if ( ! c$smtp?$current_entity || new_entity )
{
local info: EntityInfo;
info$ts=network_time();
info$uid=c$uid;
info$id=c$id;
info$trans_depth=c$smtp$trans_depth;
c$smtp$current_entity = info;
++c$smtp_state$mime_level;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
event mime_begin_entity(c: connection) &priority=10 event mime_begin_entity(c: connection) &priority=10
{ {
if ( ! c?$smtp ) return; c$smtp$entity = Entity();
++c$smtp_state$mime_depth;
set_session(c, T);
} }
event file_new(f: fa_file) &priority=5 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{ {
if ( ! f?$source ) return; if ( f$source == "SMTP" && c?$smtp )
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local extracting: bool = F;
for ( cid in f$conns )
{ {
local c: connection = f$conns[cid]; if ( c$smtp?$entity && c$smtp$entity?$filename )
f$info$filename = c$smtp$entity$filename;
if ( ! c?$smtp ) next; f$info$depth = c$smtp_state$mime_depth;
if ( ! c$smtp?$current_entity ) next;
if ( c$smtp$current_entity$extract_file )
{
if ( ! extracting )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f,
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
}
c$smtp$current_entity$extraction_file = fname;
}
if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
} }
} }
function check_extract_by_type(f: fa_file) event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{ {
if ( extract_file_types !in f$mime_type ) return; if ( ! c?$smtp )
if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
return; return;
local fname: string = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$extraction_file = fname;
}
}
function check_md5_by_type(f: fa_file)
{
if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$mime_type ) return;
if ( f?$conns )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
c$smtp$current_entity$mime_type = f$mime_type;
}
check_extract_by_type(f);
check_md5_by_type(f);
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
# Only log if there was some content.
if ( f$seen_bytes == 0 ) next;
if ( f?$info && f$info?$md5 )
c$smtp$current_entity$md5 = f$info$md5;
c$smtp$current_entity$content_len = f$seen_bytes;
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
return;
}
}
event mime_one_header(c: connection, h: mime_header_rec)
{
if ( ! c?$smtp ) return;
if ( h$name == "CONTENT-DISPOSITION" && if ( h$name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value ) /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
if ( h$name == "CONTENT-TYPE" && if ( h$name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in h$value ) /[nN][aA][mM][eE][:blank:]*=/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=5
{
if ( c?$smtp && c$smtp?$entity )
delete c$smtp$entity;
} }

View file

@ -1,27 +0,0 @@
@load ./main
@load ./entities
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module SMTP;
export {
## Default file handle provider for SMTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$smtp ) return "";
return cat(Analyzer::ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp_state$mime_level);
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_SMTP ) return;
set_file_handle(SMTP::get_file_handle(c, is_orig));
}

Some files were not shown because too many files have changed in this diff Show more