Merge remote-tracking branch 'origin/master' into topic/bernhard/thread-cleanup

Conflicts:
	src/main.cc
This commit is contained in:
Bernhard Amann 2013-05-15 16:00:49 -07:00
commit f389cafc3b
262 changed files with 88998 additions and 160161 deletions

View file

@ -1,4 +1,12 @@
2.1-576 | 2013-05-15 14:29:09 -0700
* Initial version of new file analysis framework. This moves most of
the processing of file content from script-land into the core,
where it belongs. Much of this is an internal change, and at this
point the new code has essentially feature-equality with the old
one. More script-level changes to come. (Jon Siwek)
2.1-502 | 2013-05-10 19:29:37 -0700 2.1-502 | 2013-05-10 19:29:37 -0700
* Allow default function/hook/event parameters. Addresses #972. (Jon * Allow default function/hook/event parameters. Addresses #972. (Jon

View file

@ -17,12 +17,17 @@ set(BRO_SCRIPT_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts)
get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE) ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
"export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n"
"export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh
"setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n"
"setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1) file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1)
@ -69,6 +74,12 @@ if (MISSING_PREREQS)
message(FATAL_ERROR "Configuration aborted due to missing prerequisites") message(FATAL_ERROR "Configuration aborted due to missing prerequisites")
endif () endif ()
set(libmagic_req 5.04)
if ( LibMagic_VERSION VERSION_LESS ${libmagic_req} )
message(FATAL_ERROR "libmagic of at least version ${libmagic_req} required "
"(found ${LibMagic_VERSION})")
endif ()
include_directories(BEFORE include_directories(BEFORE
${PCAP_INCLUDE_DIR} ${PCAP_INCLUDE_DIR}
${OpenSSL_INCLUDE_DIR} ${OpenSSL_INCLUDE_DIR}
@ -190,6 +201,11 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING
PATTERN "COPYING" EXCLUDE
PATTERN "*"
)
######################################################################## ########################################################################
## Packaging Setup ## Packaging Setup

30
NEWS
View file

@ -55,6 +55,36 @@ New Functionality
global foo: function(s: string, t: string &default="abc", u: count &default=0); global foo: function(s: string, t: string &default="abc", u: count &default=0);
- The new file analysis framework moves most of the processing of file
content from script-land into the core, where it belongs. Much of
this is an internal change, the framework comes with the following
user-visibible functionality (some of that was already available
before, but done differently):
[TODO: This will probably change with further script updates.]
- A binary input reader interfaces the input framework with file
analysis, allowing to inject files on disk into Bro's
processing.
- Supports for analyzing data transfereed via HTTP range
requests.
- HTTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
- SMTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
* Provide access to start of entity data.
- FTP data transfers: Identify MIME type; record to disk.
- IRC DCC transfers: Record to disk.
Changed Functionality Changed Functionality
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View file

@ -1 +1 @@
2.1-502 2.1-576

2
cmake

@ -1 +1 @@
Subproject commit 94e72a3075bb0b9550ad05758963afda394bfb2c Subproject commit e1a7fd00a0a66d6831a239fe84f5fcfaa54e2c35

View file

@ -19,6 +19,7 @@ rest_target(${psd} base/init-bare.bro internal)
rest_target(${CMAKE_BINARY_DIR}/src base/bro.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/bro.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/const.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/const.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/event.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/event.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/file_analysis.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/input.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/input.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/logging.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/logging.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/src base/reporter.bif.bro)
@ -32,9 +33,11 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
rest_target(${psd} base/frameworks/communication/main.bro) rest_target(${psd} base/frameworks/communication/main.bro)
rest_target(${psd} base/frameworks/control/main.bro) rest_target(${psd} base/frameworks/control/main.bro)
rest_target(${psd} base/frameworks/dpd/main.bro) rest_target(${psd} base/frameworks/dpd/main.bro)
rest_target(${psd} base/frameworks/file-analysis/main.bro)
rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/main.bro)
rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro)
rest_target(${psd} base/frameworks/input/readers/benchmark.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro)
rest_target(${psd} base/frameworks/input/readers/binary.bro)
rest_target(${psd} base/frameworks/input/readers/raw.bro) rest_target(${psd} base/frameworks/input/readers/raw.bro)
rest_target(${psd} base/frameworks/intel/cluster.bro) rest_target(${psd} base/frameworks/intel/cluster.bro)
rest_target(${psd} base/frameworks/intel/input.bro) rest_target(${psd} base/frameworks/intel/input.bro)
@ -80,21 +83,25 @@ rest_target(${psd} base/protocols/conn/main.bro)
rest_target(${psd} base/protocols/conn/polling.bro) rest_target(${psd} base/protocols/conn/polling.bro)
rest_target(${psd} base/protocols/dns/consts.bro) rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro) rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/file-analysis.bro)
rest_target(${psd} base/protocols/ftp/file-extract.bro) rest_target(${psd} base/protocols/ftp/file-extract.bro)
rest_target(${psd} base/protocols/ftp/gridftp.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro)
rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/http/file-analysis.bro)
rest_target(${psd} base/protocols/http/file-extract.bro) rest_target(${psd} base/protocols/http/file-extract.bro)
rest_target(${psd} base/protocols/http/file-hash.bro) rest_target(${psd} base/protocols/http/file-hash.bro)
rest_target(${psd} base/protocols/http/file-ident.bro) rest_target(${psd} base/protocols/http/file-ident.bro)
rest_target(${psd} base/protocols/http/main.bro) rest_target(${psd} base/protocols/http/main.bro)
rest_target(${psd} base/protocols/http/utils.bro) rest_target(${psd} base/protocols/http/utils.bro)
rest_target(${psd} base/protocols/irc/dcc-send.bro) rest_target(${psd} base/protocols/irc/dcc-send.bro)
rest_target(${psd} base/protocols/irc/file-analysis.bro)
rest_target(${psd} base/protocols/irc/main.bro) rest_target(${psd} base/protocols/irc/main.bro)
rest_target(${psd} base/protocols/modbus/consts.bro) rest_target(${psd} base/protocols/modbus/consts.bro)
rest_target(${psd} base/protocols/modbus/main.bro) rest_target(${psd} base/protocols/modbus/main.bro)
rest_target(${psd} base/protocols/smtp/entities-excerpt.bro) rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} base/protocols/smtp/entities.bro) rest_target(${psd} base/protocols/smtp/entities.bro)
rest_target(${psd} base/protocols/smtp/file-analysis.bro)
rest_target(${psd} base/protocols/smtp/main.bro) rest_target(${psd} base/protocols/smtp/main.bro)
rest_target(${psd} base/protocols/socks/consts.bro) rest_target(${psd} base/protocols/socks/consts.bro)
rest_target(${psd} base/protocols/socks/main.bro) rest_target(${psd} base/protocols/socks/main.bro)

29
magic/COPYING Normal file
View file

@ -0,0 +1,29 @@
# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
# Software written by Ian F. Darwin and others;
# maintained 1994- Christos Zoulas.
#
# This software is not subject to any export provision of the United States
# Department of Commerce, and may be exported to any country or planet.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice immediately at the beginning of the file, without modification,
# this list of conditions, and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

208
magic/animation Normal file
View file

@ -0,0 +1,208 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $
# animation: file(1) magic for animation/movie formats
#
# animation formats
# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
# SGI and Apple formats
0 string MOVI Silicon Graphics movie file
!:mime video/x-sgi-movie
4 string moov Apple QuickTime
!:mime video/quicktime
4 string mdat Apple QuickTime movie (unoptimized)
!:mime video/quicktime
#4 string wide Apple QuickTime movie (unoptimized)
#!:mime video/quicktime
#4 string skip Apple QuickTime movie (modified)
#!:mime video/quicktime
#4 string free Apple QuickTime movie (modified)
#!:mime video/quicktime
4 string idsc Apple QuickTime image (fast start)
!:mime image/x-quicktime
#4 string idat Apple QuickTime image (unoptimized)
#!:mime image/x-quicktime
4 string pckg Apple QuickTime compressed archive
!:mime application/x-quicktime-player
4 string/W jP JPEG 2000 image
!:mime image/jp2
4 string ftyp ISO Media
>8 string isom \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp41 \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp42 \b, MPEG v4 system, version 2
!:mime video/mp4
>8 string/W jp2 \b, JPEG 2000
!:mime image/jp2
>8 string 3ge \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gg \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gp \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gs \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3g2 \b, MPEG v4 system, 3GPP2
!:mime video/3gpp2
>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile
!:mime video/mp4
>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC
!:mime video/3gpp
>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC
!:mime audio/mp4
>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC
!:mime video/mp4
>8 string/W qt \b, Apple QuickTime movie
!:mime video/quicktime
# MPEG sequences
# Scans for all common MPEG header start codes
0 belong&0xFFFFFF00 0x00000100
>3 byte 0xBA MPEG sequence
!:mime video/mpeg
# GRR too general as it catches also FoxPro Memo example NG.FPT
>3 byte 0xB0 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB5 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB3 MPEG sequence
!:mime video/mpeg
# MPEG ADTS Audio (*.mpx/mxa/aac)
# from dreesen@math.fu-berlin.de
# modified to fully support MPEG ADTS
# MP3, M1A
# modified by Joerg Jenderek
# GRR the original test are too common for many DOS files
# so don't accept as MP3 until we've tested the rate
0 beshort&0xFFFE 0xFFFA
# rates
>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps
!:mime audio/mpeg
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
!:mime audio/mpeg
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
!:mime audio/mpeg
# MPA, M2A
0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2
!:mime audio/mpeg
# MP3, M25A
0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5
!:mime audio/mpeg
# Stored AAC streams (instead of the MP4 format)
0 string ADIF MPEG ADIF, AAC
!:mime audio/x-hx-aac-adif
# Live or stored single AAC stream (used with MPEG-2 systems)
0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC
!:mime audio/x-hx-aac-adts
# Live MPEG-4 audio streams (instead of RTP FlexMux)
0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS
!:mime audio/x-mp4a-latm
# This magic isn't strong enough (matches plausible ISO-8859-1 text)
#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream
#!:mime audio/x-mp4a-latm
# Summary: FLI animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF11
# standard FLI always has 320x200 resolution and 8 bit color
>8 leshort 320
>>10 leshort 200
>>>12 leshort 8 FLI animation, 320x200x8
!:mime video/x-fli
# Summary: FLC animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF12
# standard FLC always use 8 bit color
>12 leshort 8 FLC animation
!:mime video/x-flc
# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
0 belong 0x3026b275 Microsoft ASF
!:mime video/x-ms-asf
# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8aMNG MNG video data,
!:mime video/x-mng
# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8bJNG JNG video data,
!:mime video/x-jng
# VRML (Virtual Reality Modelling Language)
0 string/w #VRML\ V1.0\ ascii VRML 1 file
!:mime model/vrml
0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file
!:mime model/vrml
# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
# From Michel Briand <michelbriand@free.fr>
0 string/t \<?xml\ version="
!:strength +1
>20 search/1000/cw \<!DOCTYPE\ X3D X3D (Extensible 3D) model xml text
!:mime model/x3d
# MPEG file
# MPEG sequences
# FIXME: This section is from the old magic.mime file and needs integrating with the rest
0 belong 0x000001BA
>4 byte &0x40
!:mime video/mp2p
>4 byte ^0x40
!:mime video/mpeg
0 belong 0x000001BB
!:mime video/mpeg
0 belong 0x000001B0
!:mime video/mp4v-es
0 belong 0x000001B5
!:mime video/mp4v-es
0 belong 0x000001B3
!:mime video/mpv
0 belong&0xFF5FFF1F 0x47400010
!:mime video/mp2t
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264

242
magic/archive Normal file
View file

@ -0,0 +1,242 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
# pre-POSIX "tar" archives are handled in the C code.
# POSIX tar archives
257 string ustar\0 POSIX tar archive
!:mime application/x-tar # encoding: posix
257 string ustar\040\040\0 GNU tar archive
!:mime application/x-tar # encoding: gnu
# cpio archives
#
# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
# The idea is to indicate archives produced on machines with the same
# byte order as the machine running "file" with "cpio archive", and
# to indicate archives produced on machines with the opposite byte order
# from the machine running "file" with "byte-swapped cpio archive".
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
0 short 070707 cpio archive
!:mime application/x-cpio
0 short 0143561 byte-swapped cpio archive
!:mime application/x-cpio # encoding: swapped
#
# System V Release 1 portable(?) archive format.
#
0 string =<ar> System V Release 1 ar archive
!:mime application/x-archive
#
# Debian package; it's in the portable archive format, and needs to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "debian".
#
0 string =!<arch>\ndebian
!:mime application/x-debian-package
#
# MIPS archive; they're in the portable archive format, and need to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "__________E".
#
0 string =!<arch>\n__________E MIPS archive
!:mime application/x-archive
#
# BSD/SVR2-and-later portable archive formats.
#
0 string =!<arch> current ar archive
!:mime application/x-archive
# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
#
# The first byte is the magic (0x1a), byte 2 is the compression type for
# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
# filename of the first file (null terminated). Since some types collide
# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
!:mime application/x-arc
# [JW] stuff taken from idarc, obviously ARC successors:
0 lelong&0x8080ffff 0x00000a1a PAK archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000141a ARC+ archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000481a HYP archive data
!:mime application/x-arc
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data
!:mime application/x-arj
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lh0- LHarc 1.x/ARX archive data [lh0]
!:mime application/x-lharc
2 string -lh1- LHarc 1.x/ARX archive data [lh1]
!:mime application/x-lharc
2 string -lz4- LHarc 1.x archive data [lz4]
!:mime application/x-lharc
2 string -lz5- LHarc 1.x archive data [lz5]
!:mime application/x-lharc
# [never seen any but the last; -lh4- reported in comp.compression:]
2 string -lzs- LHa/LZS archive data [lzs]
!:mime application/x-lha
2 string -lh\40- LHa 2.x? archive data [lh ]
!:mime application/x-lha
2 string -lhd- LHa 2.x? archive data [lhd]
!:mime application/x-lha
2 string -lh2- LHa 2.x? archive data [lh2]
!:mime application/x-lha
2 string -lh3- LHa 2.x? archive data [lh3]
!:mime application/x-lha
2 string -lh4- LHa (2.x) archive data [lh4]
!:mime application/x-lha
2 string -lh5- LHa (2.x) archive data [lh5]
!:mime application/x-lha
2 string -lh6- LHa (2.x) archive data [lh6]
!:mime application/x-lha
2 string -lh7- LHa (2.x)/LHark archive data [lh7]
!:mime application/x-lha
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0 string Rar! RAR archive data,
!:mime application/x-rar
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
!:mime application/zip
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\003\004
# Specialised zip formats which start with a member named 'mimetype'
# (stored uncompressed, with no 'extra field') containing the file's MIME type.
# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
# contents starting with "application/":
>26 string \x8\0\0\0mimetypeapplication/
# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
# http://lists.oasis-open.org/archives/office/200505/msg00006.html
# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
>>50 string vnd.oasis.opendocument. OpenDocument
>>>73 string text
>>>>77 byte !0x2d Text
!:mime application/vnd.oasis.opendocument.text
>>>>77 string -template Text Template
!:mime application/vnd.oasis.opendocument.text-template
>>>>77 string -web HTML Document Template
!:mime application/vnd.oasis.opendocument.text-web
>>>>77 string -master Master Document
!:mime application/vnd.oasis.opendocument.text-master
>>>73 string graphics
>>>>81 byte !0x2d Drawing
!:mime application/vnd.oasis.opendocument.graphics
>>>>81 string -template Template
!:mime application/vnd.oasis.opendocument.graphics-template
>>>73 string presentation
>>>>85 byte !0x2d Presentation
!:mime application/vnd.oasis.opendocument.presentation
>>>>85 string -template Template
!:mime application/vnd.oasis.opendocument.presentation-template
>>>73 string spreadsheet
>>>>84 byte !0x2d Spreadsheet
!:mime application/vnd.oasis.opendocument.spreadsheet
>>>>84 string -template Template
!:mime application/vnd.oasis.opendocument.spreadsheet-template
>>>73 string chart
>>>>78 byte !0x2d Chart
!:mime application/vnd.oasis.opendocument.chart
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.chart-template
>>>73 string formula
>>>>80 byte !0x2d Formula
!:mime application/vnd.oasis.opendocument.formula
>>>>80 string -template Template
!:mime application/vnd.oasis.opendocument.formula-template
>>>73 string database Database
!:mime application/vnd.oasis.opendocument.database
>>>73 string image
>>>>78 byte !0x2d Image
!:mime application/vnd.oasis.opendocument.image
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.image-template
# EPUB (OEBPS) books using OCF (OEBPS Container Format)
# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
# From: Ralf Brown <ralf.brown@gmail.com>
>0x1E string mimetypeapplication/epub+zip EPUB document
!:mime application/epub+zip
# Catch other ZIP-with-mimetype formats
# In a ZIP file, the bytes immediately after a member's contents are
# always "PK". The 2 regex rules here print the "mimetype" member's
# contents up to the first 'P'. Luckily, most MIME types don't contain
# any capital 'P's. This is a kludge.
# (mimetype contains "application/<OTHER>")
>>50 string !epub+zip
>>>50 string !vnd.oasis.opendocument.
>>>>50 string !vnd.sun.xml.
>>>>>50 string !vnd.kde.
>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# (mimetype contents other than "application/*")
>26 string \x8\0\0\0mimetype
>>38 string !application/
>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# Java Jar files
>(26.s+30) leshort 0xcafe Java Jar file data (zip)
!:mime application/jar
# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
# Next line excludes specialized formats:
>(26.s+30) leshort !0xcafe
>>26 string !\x8\0\0\0mimetype Zip archive data
!:mime application/zip
# Zoo archiver
20 lelong 0xfdc4a7dc Zoo archive data
!:mime application/x-zoo
# Shell archives
10 string #\ This\ is\ a\ shell\ archive shell archive text
!:mime application/octet-stream
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
# EET archive
# From: Tilman Sauerbeck <tilman@code-monkey.de>
0 belong 0x1ee7ff00 EET archive
!:mime application/x-eet
# Symbian installation files
# http://www.thouky.co.uk/software/psifs/sis.html
# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8 lelong 0x10000419 Symbian installation file
!:mime application/vnd.symbian.install
0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
!:mime x-epoc/x-sisx-app

19
magic/assembler Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $
# make: file(1) magic for assembler source
#
0 regex \^[\020\t]*\\.asciiz assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.byte assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.even assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.globl assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.text assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.file assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.type assembler source text
!:mime text/x-asm

149
magic/audio Normal file
View file

@ -0,0 +1,149 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $
# audio: file(1) magic for sound formats (see also "iff")
#
# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
# and others
#
# Sun/NeXT audio data
0 string .snd Sun/NeXT audio data:
>12 belong 1 8-bit ISDN mu-law,
!:mime audio/basic
>12 belong 2 8-bit linear PCM [REF-PCM],
!:mime audio/basic
>12 belong 3 16-bit linear PCM,
!:mime audio/basic
>12 belong 4 24-bit linear PCM,
!:mime audio/basic
>12 belong 5 32-bit linear PCM,
!:mime audio/basic
>12 belong 6 32-bit IEEE floating point,
!:mime audio/basic
>12 belong 7 64-bit IEEE floating point,
!:mime audio/basic
>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-adpcm
# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
# that uses little-endian encoding and has a different magic number
0 lelong 0x0064732E DEC audio data:
>12 lelong 1 8-bit ISDN mu-law,
!:mime audio/x-dec-basic
>12 lelong 2 8-bit linear PCM [REF-PCM],
!:mime audio/x-dec-basic
>12 lelong 3 16-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 4 24-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 5 32-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 6 32-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 7 64-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-dec-basic
# Creative Labs AUDIO stuff
0 string MThd Standard MIDI data
!:mime audio/midi
0 string CTMF Creative Music (CMF) data
!:mime audio/x-unknown
0 string SBI SoundBlaster instrument data
!:mime audio/x-unknown
0 string Creative\ Voice\ File Creative Labs voice data
!:mime audio/x-unknown
# Real Audio (Magic .ra\0375)
0 belong 0x2e7261fd RealAudio sound file
!:mime audio/x-pn-realaudio
0 string .RMF\0\0\0 RealMedia file
!:mime application/vnd.rn-realmedia
# mime types according to http://www.geocities.com/nevilo/mod.htm:
# audio/it .it
# audio/x-zipped-it .itz
# audio/xm fasttracker modules
# audio/x-s3m screamtracker modules
# audio/s3m screamtracker modules
# audio/x-zipped-mod mdz
# audio/mod mod
# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
#
# Taken from loader code from mikmod version 2.14
# by Steve McIntyre (stevem@chiark.greenend.org.uk)
# <doj@cubic.org> added title printing on 2003-06-24
0 string MAS_UTrack_V00
>14 string >/0 ultratracker V1.%.1s module sound data
!:mime audio/x-mod
#audio/x-tracker-module
0 string Extended\ Module: Fasttracker II module sound data
!:mime audio/x-mod
#audio/x-tracker-module
21 string/c =!SCREAM! Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
21 string BMOD2STM Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
1080 string M.K. 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string M!K! 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string FLT4 4-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string FLT8 8-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string 4CHN 4-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 6CHN 6-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 8CHN 8-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string CD81 8-channel Octalyser module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
1080 string OKTA 8-channel Octalyzer module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
# Not good enough.
#1082 string CH
#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data
1080 string 16CN 16-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
1080 string 32CN 32-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
# Impulse tracker module (audio/x-it)
0 string IMPM Impulse Tracker module sound data -
!:mime audio/x-mod
# Free lossless audio codec <http://flac.sourceforge.net>
# From: Przemyslaw Augustyniak <silvathraec@rpg.pl>
0 string fLaC FLAC audio bitstream data
!:mime audio/x-flac
# Monkey's Audio compressed audio format (.ape)
# From danny.milo@gmx.net (Danny Milosavljevic)
# New version from Abel Cheung <abel (@) oaka.org>
0 string MAC\040 Monkey's Audio compressed format
!:mime audio/x-ape
# musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
0 string MP+ Musepack audio
!:mime audio/x-musepack

47
magic/c-lang Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
# c-lang: file(1) magic for C and related languages programs
#
# BCPL
0 search/8192 "libhdr" BCPL source text
!:mime text/x-bcpl
0 search/8192 "LIBHDR" BCPL source text
!:mime text/x-bcpl
# C
0 regex \^#include C source text
!:mime text/x-c
0 regex \^char C source text
!:mime text/x-c
0 regex \^double C source text
!:mime text/x-c
0 regex \^extern C source text
!:mime text/x-c
0 regex \^float C source text
!:mime text/x-c
0 regex \^struct C source text
!:mime text/x-c
0 regex \^union C source text
!:mime text/x-c
0 search/8192 main( C source text
!:mime text/x-c
# C++
# The strength of these rules is increased so they beat the C rules above
0 regex \^template C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^virtual C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^class C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^public: C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^private: C++ source text
!:strength + 5
!:mime text/x-c++

31
magic/cafebabe Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $
# Cafe Babes unite!
#
# Since Java bytecode and Mach-O universal binaries have the same magic number,
# the test must be performed in the same "magic" sequence to get both right.
# The long at offset 4 in a Mach-O universal binary tells the number of
# architectures; the short at offset 4 in a Java bytecode file is the JVM minor
# version and the short at offset 6 is the JVM major version. Since there are only
# only 18 labeled Mach-O architectures at current, and the first released
# Java class format was version 43.0, we can safely choose any number
# between 18 and 39 to test the number of architectures against
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
### JAVA START ###
0 belong 0xcafebabe
!:mime application/x-java-applet
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
### JAVA END ###

82
magic/commands Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $
# commands: file(1) magic for various shells and interpreters
#
#0 string/w : shell archive or script for antique kernel text
0 string/wt #!\ /bin/sh POSIX shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/csh C shell script text executable
!:mime text/x-shellscript
# korn shell magic, sent by George Wu, gwu@clyde.att.com
0 string/wt #!\ /bin/ksh Korn shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
#
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/local/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable
!:mime text/x-gawk
#
0 string/wt #!\ /bin/awk awk script text executable
!:mime text/x-awk
0 string/wt #!\ /usr/bin/awk awk script text executable
!:mime text/x-awk
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
0 string/wt #!\ /bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
# PHP scripts
# Ulf Harnhammar <ulfh@update.uu.se>
0 search/1/c =<?php PHP script text
!:strength + 10
!:mime text/x-php
0 search/1 =<?\n PHP script text
!:mime text/x-php
0 search/1 =<?\r PHP script text
!:mime text/x-php
0 search/1/w #!\ /usr/local/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
0 search/1/w #!\ /usr/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
# Smarty compiled template, http://www.smarty.net/
# Elan Ruusamae <glen@delfi.ee>
0 string =<?php\ /*\ Smarty\ version Smarty compiled template
>24 regex [0-9.]+ \b, version %s
!:mime text/x-php

77
magic/compress Normal file
View file

@ -0,0 +1,77 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $
# compress: file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
!:apple LZIVZIVU
# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
# * Original filename is only at offset 10 if "extra field" absent
# * Produce shorter output - notably, only report compression methods
# other than 8 ("deflate", the only method defined in RFC 1952).
0 string \037\213 gzip compressed data
!:mime application/x-gzip
# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
0 string \037\036 packed data
!:mime application/octet-stream
#
# This magic number is byte-order-independent.
0 short 0x1f1f old packed data
!:mime application/octet-stream
# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
0 short 0x1fff compacted data
!:mime application/octet-stream
# This string is valid for SunOS (BE) and a matching "short" is listed
# in the Ultrix (LE) magic file.
0 string \377\037 compacted data
!:mime application/octet-stream
0 short 0145405 huf output
!:mime application/octet-stream
# bzip2
0 string BZh bzip2 compressed data
!:mime application/x-bzip2
# lzip
0 string LZIP lzip compressed data
!:mime application/x-lzip
# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# http://www.7-zip.org or DOC/7zFormat.txt
#
0 string 7z\274\257\047\034 7-zip archive data,
>6 byte x version %d
>7 byte x \b.%d
!:mime application/x-7z-compressed
# Type: LZMA
0 lelong&0xffffff =0x5d
>12 leshort =0xff LZMA compressed data,
>>5 lequad =0xffffffffffffffff streamed
>>5 lequad !0xffffffffffffffff non-streamed, size %lld
!:mime application/x-lzma
# http://tukaani.org/xz/xz-file-format.txt
0 ustring \xFD7zXZ\x00 XZ compressed data
!:mime application/x-xz
# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
0 string LRZI LRZIP compressed data
>4 byte x - version %d
>5 byte x \b.%d
!:mime application/x-lrzip

47
magic/database Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $
# database: file(1) magic for various databases
#
# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
#
#
# GDBM magic numbers
# Will be maintained as part of the GDBM distribution in the future.
# <downsj@teeny.org>
0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian
!:mime application/x-gdbm
0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian
!:mime application/x-gdbm
0 string GDBM GNU dbm 2.x database
!:mime application/x-gdbm
#
# Berkeley DB
#
# Ian Darwin's file /etc/magic files: big/little-endian version.
#
# Hash 1.85/1.86 databases store metadata in network byte order.
# Btree 1.85/1.86 databases store the metadata in host byte order.
# Hash and Btree 2.X and later databases store the metadata in host byte order.
0 long 0x00061561 Berkeley DB
!:mime application/x-dbm
# MS Access database
4 string Standard\ Jet\ DB Microsoft Access Database
!:mime application/x-msaccess
4 string Standard\ ACE\ DB Microsoft Access Database
!:mime application/x-msaccess
# Tokyo Cabinet magic data
# http://tokyocabinet.sourceforge.net/index.html
0 string ToKyO\ CaBiNeT\n Tokyo Cabinet
>14 string x \b (%s)
>32 byte 0 \b, Hash
!:mime application/x-tokyocabinet-hash
>32 byte 1 \b, B+ tree
!:mime application/x-tokyocabinet-btree
>32 byte 2 \b, Fixed-length
!:mime application/x-tokyocabinet-fixed
>32 byte 3 \b, Table
!:mime application/x-tokyocabinet-table

25
magic/diff Normal file
View file

@ -0,0 +1,25 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $
# diff: file(1) magic for diff(1) output
#
0 search/1 diff\ diff output text
!:mime text/x-diff
0 search/1 ***\ diff output text
!:mime text/x-diff
0 search/1 Only\ in\ diff output text
!:mime text/x-diff
0 search/1 Common\ subdirectories:\ diff output text
!:mime text/x-diff
0 search/1 Index: RCS/CVS diff output text
!:mime text/x-diff
# unified diff
0 search/4096 ---\
>&0 search/1024 \n
>>&0 search/1 +++\
>>>&0 search/1024 \n
>>>>&0 search/1 @@ unified diff output text
!:mime text/x-diff
!:strength + 90

43
magic/elf Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# elf: file(1) magic for ELF executables
#
# We have to check the byte order flag to see what byte order all the
# other stuff in the header is in.
#
# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
#
# Created by: unknown
# Modified by (1): Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (2): Peter Tobias <tobias@server.et-inf.fho-emden.de> (core support)
# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk@ds9.au.s.shuttle.de> (fix of core support)
# Modified by (4): <gerardo.cacciari@gmail.com> (VMS Itanium)
# Modified by (5): Matthias Urlichs <smurf@debian.org> (Listing of many architectures)
0 string \177ELF ELF
>4 byte 0 invalid class
>4 byte 1 32-bit
>4 byte 2 64-bit
>5 byte 0 invalid byte order
>5 byte 1 LSB
>>16 leshort 0 no file type,
!:strength *2
!:mime application/octet-stream
>>16 leshort 1 relocatable,
!:mime application/x-object
>>16 leshort 2 executable,
!:mime application/x-executable
>>16 leshort 3 shared object,
!:mime application/x-sharedlib
>>16 leshort 4 core file
!:mime application/x-coredump
>5 byte 2 MSB
>>16 beshort 0 no file type,
!:mime application/octet-stream
>>16 beshort 1 relocatable,
!:mime application/x-object
>>16 beshort 2 executable,
!:mime application/x-executable
>>16 beshort 3 shared object,
!:mime application/x-sharedlib
>>16 beshort 4 core file,
!:mime application/x-coredump

34
magic/epoc Normal file
View file

@ -0,0 +1,34 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz <hpicollo@worldnet.fr> and Peter Breitenlohner <peb@mppmu.mpg.de>
# Useful information for improving this file can be found at:
# http://software.frodo.looijaard.name/psiconv/formats/Index.html
#------------------------------------------------------------------------------
0 lelong 0x10000037 Psion Series 5
>4 lelong 0x10000042 multi-bitmap image
!:mime image/x-epoc-mbm
>4 lelong 0x1000006D
>>8 lelong 0x1000007D Sketch image
!:mime image/x-epoc-sketch
>>8 lelong 0x1000007F Word file
!:mime application/x-epoc-word
>>8 lelong 0x10000085 OPL program (TextEd)
!:mime application/x-epoc-opl
>>8 lelong 0x10000088 Sheet file
!:mime application/x-epoc-sheet
>4 lelong 0x10000073 OPO module
!:mime application/x-epoc-opo
>4 lelong 0x10000074 OPL application
!:mime application/x-epoc-app
0 lelong 0x10000050 Psion Series 5
>4 lelong 0x1000006D database
>>8 lelong 0x10000084 Agenda file
!:mime application/x-epoc-agenda
>>8 lelong 0x10000086 Data file
!:mime application/x-epoc-data
>>8 lelong 0x10000CEA Jotter file
!:mime application/x-epoc-jotter

12
magic/filesystems Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $
# filesystems: file(1) magic for different filesystems
#
# CDROM Filesystems
# Modified for UDF by gerardo.cacciari@gmail.com
32769 string CD001 #
!:mime application/x-iso9660-image
37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
!:mime application/x-iso9660-image

18
magic/flash Normal file
View file

@ -0,0 +1,18 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $
# flash: file(1) magic for Macromedia Flash file format
#
# See
#
# http://www.macromedia.com/software/flash/open/
#
0 string FWS Macromedia Flash data,
>3 byte x version %d
!:mime application/x-shockwave-flash
0 string CWS Macromedia Flash data (compressed),
!:mime application/x-shockwave-flash
# From: Cal Peake <cp@absolutedigital.net>
0 string FLV Macromedia Flash Video
!:mime video/x-flv

32
magic/fonts Normal file
View file

@ -0,0 +1,32 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $
# fonts: file(1) magic for font data
#
# X11 font files in SNF (Server Natural Format) format
# updated by Joerg Jenderek at Feb 2013
# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm
0 belong 00000004 X11 SNF font data, MSB first
#>104 belong 00000004 X11 SNF font data, MSB first
!:mime application/x-font-sfn
# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX
0 lelong 00000004
>104 lelong 00000004 X11 SNF font data, LSB first
!:mime application/x-font-sfn
# True Type fonts
0 string \000\001\000\000\000 TrueType font data
!:mime application/x-font-ttf
# Opentype font data from Avi Bercovich
0 string OTTO OpenType font data
!:mime application/vnd.ms-opentype
# Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu
0 string SplineFontDB: Spline Font Database
!:mime application/vnd.font-fontforge-sfd
# EOT
34 string LP Embedded OpenType (EOT)
!:mime application/vnd.ms-fontobject

7
magic/fortran Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
# FORTRAN source
0 regex/100 \^[Cc][\ \t] FORTRAN program
!:mime text/x-fortran
!:strength - 5

31
magic/frame Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# frame: file(1) magic for FrameMaker files
#
# This stuff came on a FrameMaker demo tape, most of which is
# copyright, but this file is "published" as witness the following:
#
# Note that this is the Framemaker Maker Interchange Format, not the
# Normal format which would be application/vnd.framemaker.
#
0 string \<MakerFile FrameMaker document
!:mime application/x-mif
0 string \<MIFFile FrameMaker MIF (ASCII) file
!:mime application/x-mif
0 search/1 \<MakerDictionary FrameMaker Dictionary text
!:mime application/x-mif
0 string \<MakerScreenFont FrameMaker Font file
!:mime application/x-mif
0 string \<MML FrameMaker MML file
!:mime application/x-mif
0 string \<BookFile FrameMaker Book file
!:mime application/x-mif
# XXX - this book entry should be verified, if you find one, uncomment this
#0 string \<Book\ FrameMaker Book (ASCII) file
#!:mime application/x-mif
#>6 string 3.0 (3.0)
#>6 string 2.0 (2.0)
#>6 string 1.0 (1.0)
0 string \<Maker Intermediate Print File FrameMaker IPL file
!:mime application/x-mif

13
magic/gimp Normal file
View file

@ -0,0 +1,13 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
# GIMP Gradient: file(1) magic for the GIMP's gradient data files
# by Federico Mena <federico@nuclecu.unam.mx>
#------------------------------------------------------------------------------
# XCF: file(1) magic for the XCF image format used in the GIMP developed
# by Spencer Kimball and Peter Mattis
# ('Bucky' LaDieu, nega@vt.edu)
0 string gimp\ xcf GIMP XCF image data,
!:mime image/x-xcf

23
magic/gnu Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $
# gnu: file(1) magic for various GNU tools
#
# GNU nlsutils message catalog file format
#
# GNU message catalog (.mo and .gmo files)
# GnuPG
# The format is very similar to pgp
# Note: magic.mime had 0x8501 for the next line instead of 0x8502
0 beshort 0x8502 GPG encrypted data
!:mime text/PGP # encoding: data
# This magic is not particularly good, as the keyrings don't have true
# magic. Nevertheless, it covers many keyrings.
0 beshort 0x9901 GPG key public ring
!:mime application/x-gnupg-keyring
# gettext message catalogue
0 regex \^msgid\ GNU gettext message catalogue text
!:mime text/x-po

8
magic/gnumeric Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# gnumeric: file(1) magic for Gnumeric spreadsheet
# This entry is only semi-helpful, as Gnumeric compresses its files, so
# they will ordinarily reported as "compressed", but at least -z helps
39 string =<gmr:Workbook Gnumeric spreadsheet
!:mime application/x-gnumeric

51
magic/icc Normal file
View file

@ -0,0 +1,51 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# icc: file(1) magic for International Color Consortium file formats
#
# Color profiles as per the ICC's "Image technology colour management -
# Architecture, profile format, and data structure" specification.
# See
#
# http://www.color.org/specification/ICC1v43_2010-12.pdf
#
# for Specification ICC.1:2010 (Profile version 4.3.0.0).
#
# Bytes 36 to 39 contain a generic profile file signature of "acsp";
# bytes 40 to 43 "may be used to identify the primary platform/operating
# system framework for which the profile was created".
#
# There are other fields that might be worth dumping as well.
#
# This appears to be what's used for Apple ColorSync profiles.
# Instead of adding that, Apple just changed the generic "acsp" entry
# to be for "ColorSync ICC Color Profile" rather than "Kodak Color
# Management System, ICC Profile".
# Yes, it's "APPL", not "AAPL"; see the spec.
36 string acspAPPL ColorSync ICC Profile
!:mime application/vnd.iccprofile
# Microsoft ICM color profile
36 string acspMSFT Microsoft ICM Color Profile
!:mime application/vnd.iccprofile
# Yes, that's a blank after "SGI".
36 string acspSGI\ SGI ICC Profile
!:mime application/vnd.iccprofile
# XXX - is this what's used for the Sun KCMS or not? The standard file
# uses just "acsp" for that, but Apple's file uses it for "ColorSync",
# and there *is* an identified "primary platform" value of SUNW.
36 string acspSUNW Sun KCMS ICC Profile
!:mime application/vnd.iccprofile
# Any other profile.
# XXX - should we use "acsp\0\0\0\0" for "no primary platform" profiles,
# and use "acsp" for everything else and dump the "primary platform"
# string in those cases?
36 string acsp ICC Profile
!:mime application/vnd.iccprofile

21
magic/iff Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
# iff: file(1) magic for Interchange File Format (see also "audio" & "images")
#
# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic
# Arts for file interchange. It has also been used by Apple, SGI, and
# especially Commodore-Amiga.
#
# IFF files begin with an 8 byte FORM header, followed by a 4 character
# FORM type, which is followed by the first chunk in the FORM.
0 string FORM IFF data
#>4 belong x \b, FORM is %d bytes long
# audio formats
>8 string AIFF \b, AIFF audio
!:mime audio/x-aiff
>8 string AIFC \b, AIFF-C compressed audio
!:mime audio/x-aiff
>8 string 8SVX \b, 8SVX 8-bit sampled sound voice
!:mime audio/x-aiff

255
magic/images Normal file
View file

@ -0,0 +1,255 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $
# images: file(1) magic for image formats (see also "iff", and "c-lang" for
# XPM bitmaps)
#
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
# additions by janl@ifi.uio.no as well as others. Jan also suggested
# merging several one- and two-line files into here.
#
# little magic: PCX (first byte is 0x0a)
# PBMPLUS images
# The next byte following the magic is always whitespace.
# strength is changed to try these patterns before "x86 boot sector"
0 search/1 P1
>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 search/1 P2
>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 search/1 P3 Netpbm PPM image text
>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P4
>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 string P5
>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 string P6
>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P7 Netpbm PAM image file
!:mime image/x-portable-pixmap
# NIFF (Navy Interchange File Format, a modification of TIFF) images
# [GRR: this *must* go before TIFF]
0 string IIN1 NIFF image data
!:mime image/x-niff
# Canon RAW version 1 (CRW) files are a type of Canon Image File Format
# (CIFF) file. These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html
0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data
!:mime image/x-canon-crw
# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic
# number. Put this above the TIFF test to make sure we detect them.
# These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2
0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data
!:mime image/x-canon-cr2
# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
# The second word of TIFF files is the TIFF version number, 42, which has
# never changed. The TIFF specification recommends testing for it.
0 string MM\x00\x2a TIFF image data, big-endian
!:mime image/tiff
0 string II\x2a\x00 TIFF image data, little-endian
!:mime image/tiff
0 string MM\x00\x2b Big TIFF image data, big-endian
!:mime image/tiff
0 string II\x2b\x00 Big TIFF image data, little-endian
!:mime image/tiff
# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
# (Greg Roelofs, newt@uchicago.edu)
# (Albert Cahalan, acahalan@cs.uml.edu)
#
# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
#
0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
!:mime image/png
# possible GIF replacements; none yet released!
# (Greg Roelofs, newt@uchicago.edu)
#
# GRR 950115: this was mine ("Zip GIF"):
0 string GIF94z ZIF image (GIF+deflate alpha)
!:mime image/x-unknown
#
# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
#
0 string FGF95a FGF image (GIF+deflate beta)
!:mime image/x-unknown
#
# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
# (best; not yet implemented):
#
0 string PBF PBF image (deflate compression)
!:mime image/x-unknown
# GIF
0 string GIF8 GIF image data
!:mime image/gif
!:apple 8BIMGIFf
# From: Joerg Jenderek <joerg.jen.der.ek@gmx.net>
# most files with the extension .EPA and some with .BMP
0 string \x11\x06 Award BIOS Logo, 136 x 84
!:mime image/x-award-bioslogo
0 string \x11\x09 Award BIOS Logo, 136 x 126
!:mime image/x-award-bioslogo
#0 string \x07\x1f BIOS Logo corrupted?
# http://www.blackfiveservices.co.uk/awbmtools.shtml
# http://biosgfx.narod.ru/v3/
# http://biosgfx.narod.ru/abr-2/
0 string AWBM
>4 leshort <1981 Award BIOS bitmap
!:mime image/x-award-bmp
# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu)
0 string BM
>14 leshort 12 PC bitmap, OS/2 1.x format
!:mime image/x-ms-bmp
>14 leshort 64 PC bitmap, OS/2 2.x format
!:mime image/x-ms-bmp
>14 leshort 40 PC bitmap, Windows 3.x format
!:mime image/x-ms-bmp
>14 leshort 128 PC bitmap, Windows NT/2000 format
!:mime image/x-ms-bmp
# XPM icons (Greg Roelofs, newt@uchicago.edu)
0 search/1 /*\ XPM\ */ X pixmap image text
!:mime image/x-xpmi
# DICOM medical imaging data
128 string DICM DICOM medical imaging data
!:mime application/dicom
# XWD - X Window Dump file.
# As described in /usr/X11R6/include/X11/XWDFile.h
# used by the xwd program.
# Bradford Castalia, idaeim, 1/01
# updated by Adam Buchbinder, 2/09
# The following assumes version 7 of the format; the first long is the length
# of the header, which is at least 25 4-byte longs, and the one at offset 8
# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth,
# which is a maximum of 32.
0 belong >100
>8 belong <3
>>12 belong <33
>>>4 belong 7 XWD X Window Dump image data
!:mime image/x-xwindowdump
# PCX image files
# From: Dan Fandrich <dan@coneharvesters.com>
# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX
# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt
# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000
# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT
0 ubelong&0xffF8fe00 0x0a000000
# for PCX bit depth > 0
>3 ubyte >0
# test for valid versions
>>1 ubyte <6
>>>1 ubyte !1 PCX
!:mime image/x-pcx
# Adobe Photoshop
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string 8BPS Adobe Photoshop Image
!:mime image/vnd.adobe.photoshop
# Summary: DjVu image / document
# Extension: .djvu
# Reference: http://djvu.org/docs/DjVu3Spec.djvu
# Submitted by: Stephane Loeuillet <stephane.loeuillet@tiscali.fr>
# Modified by (1): Abel Cheung <abelcheung@gmail.com>
0 string AT&TFORM
>12 string DJVM DjVu multiple page document
!:mime image/vnd.djvu
>12 string DJVU DjVu image or single page document
!:mime image/vnd.djvu
>12 string DJVI DjVu shared document
!:mime image/vnd.djvu
>12 string THUM DjVu page thumbnails
!:mime image/vnd.djvu
# Originally by Marc Espie
# Modified by Robert Minsk <robertminsk at yahoo.com>
# http://www.openexr.com/openexrfilelayout.pdf
0 lelong 20000630 OpenEXR image data,
!:mime image/x-exr
# SMPTE Digital Picture Exchange Format, SMPTE DPX
#
# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital
# Moving-Picture Exchange (DPX), v1.0, 18 February 1994
# Robert Minsk <robertminsk at yahoo.com>
0 string SDPX DPX image data, big-endian,
!:mime image/x-dpx
#-----------------------------------------------------------------------
# Hierarchical Data Format, used to facilitate scientific data exchange
# specifications at http://hdf.ncsa.uiuc.edu/
0 belong 0x0e031301 Hierarchical Data Format (version 4) data
!:mime application/x-hdf
0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data
!:mime application/x-hdf
# http://www.cartesianinc.com/Tech/
0 string CPC\262 Cartesian Perceptual Compression image
!:mime image/x-cpi
# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches
# From: Markus Heidelberg <markus.heidelberg at web.de>
0 string/t [BitmapInfo2] Polar Monitor Bitmap text
!:mime image/x-polar-monitor-bitmap
# Type: Olympus ORF raw images.
# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
0 string MMOR Olympus ORF raw image data, big-endian
!:mime image/x-olympus-orf
0 string IIRO Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
0 string IIRS Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
# Type: Foveon X3F
# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# Note that the MIME type isn't defined anywhere that I can find; if
# there's a canonical type for this format, it should replace this one.
0 string FOVb Foveon X3F raw image data
!:mime image/x-x3f
# Paint.NET file
# From Adam Buchbinder <adam.buchbinder@gmail.com>
0 string PDN3 Paint.NET image data
!:mime image/x-paintnet

16
magic/java Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
# same magic number, 0xcafebabe, so they are both handled
# in the entry called "cafebabe".
#------------------------------------------------------------
0 belong 0xfeedfeed Java KeyStore
!:mime application/x-java-keystore
0 belong 0xcececece Java JCE KeyStore
!:mime application/x-java-jce-keystore
# Java source
0 regex ^import.*;$ Java source
!:mime text/x-java

17
magic/javascript Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: $
# javascript: magic for javascript and node.js scripts.
#
0 search/1/w #!/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ node Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable
!:mime application/javascript

31
magic/jpeg Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $
# JPEG images
# SunOS 5.5.1 had
#
# 0 string \377\330\377\340 JPEG file
# 0 string \377\330\377\356 JPG file
#
# both of which turn into "JPEG image data" here.
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
!:apple 8BIMJPEG
!:strength +2
# From: David Santinoli <david@santinoli.com>
0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000
# From: Johan van der Knijff <johan.vanderknijff@kb.nl>
# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes
# https://github.com/bitsgalore/jp2kMagic
#
# Now read value of 'Brand' field, which yields a few possibilities:
>20 string \x6a\x70\x32\x20 Part 1 (JP2)
!:mime image/jp2
>20 string \x6a\x70\x78\x20 Part 2 (JPX)
!:mime image/jpx
>20 string \x6a\x70\x6d\x20 Part 6 (JPM)
!:mime image/jpm
>20 string \x6d\x6a\x70\x32 Part 3 (MJ2)
!:mime video/mj2

11
magic/kde Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
# kde: file(1) magic for KDE
0 string/t [KDE\ Desktop\ Entry] KDE desktop entry
!:mime application/x-kdelnk
0 string/t #\ KDE\ Config\ File KDE config file
!:mime application/x-kdelnk
0 string/t #\ xmcd xmcd database file for kscd
!:mime text/x-xmcd

30
magic/kml Normal file
View file

@ -0,0 +1,30 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
# Type: Google KML, formerly Keyhole Markup Language
# Future development of this format has been handed
# over to the Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string/t \<?xml
>20 search/400 \ xmlns=
>>&0 regex ['"]http://earth.google.com/kml Google KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: OpenGIS KML, formerly Keyhole Markup Language
# This standard is maintained by the
# Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: Google KML Archive (ZIP based)
# http://code.google.com/apis/kml/documentation/kml_tut.html
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string PK\003\004
>4 byte 0x14
>>30 string doc.kml Compressed Google KML Document, including resources.
!:mime application/vnd.google-earth.kmz

22
magic/linux Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $
# linux: file(1) magic for Linux files
#
# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
# The following basic Linux magic is useful for reference, but using
# "long" magic is a better practice in order to avoid collisions.
#
# 2 leshort 100 Linux/i386
# >0 leshort 0407 impure executable (OMAGIC)
# >0 leshort 0410 pure executable (NMAGIC)
# >0 leshort 0413 demand-paged executable (ZMAGIC)
# >0 leshort 0314 demand-paged executable (QMAGIC)
#
# SYSLINUX boot logo files (from 'ppmtolss16' sources)
# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename:
# file extension .lss .16
0 lelong =0x1413f33d SYSLINUX' LSS16 image data
# syslinux-4.05/mime/image/x-lss16.xml
!:mime image/x-lss16

42
magic/lisp Normal file
View file

@ -0,0 +1,42 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# lisp: file(1) magic for lisp programs
#
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
# updated by Joerg Jenderek
# GRR: This lot is too weak
#0 string ;;
# windows INF files often begin with semicolon and use CRLF as line end
# lisp files are mainly created on unix system with LF as line end
#>2 search/4096 !\r Lisp/Scheme program text
#>2 search/4096 \r Windows INF file
0 search/4096 (setq\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defvar\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defparam\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defun\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (autoload\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (custom-set-variables\ Lisp/Scheme program text
!:mime text/x-lisp
# Emacs 18 - this is always correct, but not very magical.
0 string \012( Emacs v18 byte-compiled Lisp data
!:mime application/x-elc
# Emacs 19+ - ver. recognition added by Ian Springer
# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
# - Chris Chittleborough <cchittleborough@yahoo.com.au>
0 string ;ELC
>4 byte >18
>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data
!:mime application/x-elc
# From: David Allouche <david@allouche.net>
0 search/1 \<TeXmacs| TeXmacs document text
!:mime text/texmacs

17
magic/lua Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
# lua: file(1) magic for Lua scripting language
# URL: http://www.lua.org/
# From: Reuben Thomas <rrt@sc3d.org>, Seo Sanghyeon <tinuviel@sparcs.kaist.ac.kr>
# Lua scripts
0 search/1/w #!\ /usr/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1 #!/usr/bin/env\ lua Lua script text executable
!:mime text/x-lua
0 search/1 #!\ /usr/bin/env\ lua Lua script text executable
!:mime text/x-lua

7
magic/m4 Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for M4 scripts
#
0 regex \^dnl\ M4 macro processor script text
!:mime text/x-m4

21
magic/macintosh Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $
# macintosh description
#
# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
# Daniel Quinlan, quinlan@yggdrasil.com
11 string must\ be\ converted\ with\ BinHex BinHex binary text
!:mime application/mac-binhex40
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
!:mime application/x-stuffit
!:apple SIT!SIT!
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
!:apple SIT!SIT!
#>162 string >0 : %s

35
magic/mail.news Normal file
View file

@ -0,0 +1,35 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $
# mail.news: file(1) magic for mail and news
#
# Unfortunately, saved netnews also has From line added in some news software.
#0 string From mail text
0 string/t Relay-Version: old news text
!:mime message/rfc822
0 string/t #!\ rnews batched news text
!:mime message/rfc822
0 string/t N#!\ rnews mailed, batched news text
!:mime message/rfc822
0 string/t Forward\ to mail forwarding text
!:mime message/rfc822
0 string/t Pipe\ to mail piping text
!:mime message/rfc822
0 string/tc delivered-to: SMTP mail text
!:mime message/rfc822
0 string/tc return-path: SMTP mail text
!:mime message/rfc822
0 string/t Path: news text
!:mime message/news
0 string/t Xref: news text
!:mime message/news
0 string/t From: news or mail text
!:mime message/rfc822
0 string/t Article saved news text
!:mime message/news
0 string/t Received: RFC 822 mail text
!:mime message/rfc822
# TNEF files...
0 lelong 0x223E9F78 Transport Neutral Encapsulation Format
!:mime application/vnd.ms-tnef

16
magic/make Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for makefiles
#
0 regex \^CFLAGS makefile script text
!:mime text/x-makefile
0 regex \^LDFLAGS makefile script text
!:mime text/x-makefile
0 regex \^all: makefile script text
!:mime text/x-makefile
0 regex \^.PRECIOUS makefile script text
!:mime text/x-makefile
0 regex \^SUBDIRS automake makefile script text
!:mime text/x-makefile

29
magic/marc21 Normal file
View file

@ -0,0 +1,29 @@
# See COPYING file in this directory for original libmagic copyright.
#--------------------------------------------
# marc21: file(1) magic for MARC 21 Format
#
# Kevin Ford (kefo@loc.gov)
#
# MARC21 formats are for the representation and communication
# of bibliographic and related information in machine-readable
# form. For more info, see http://www.loc.gov/marc/
# leader position 20-21 must be 45
20 string 45
# leader starts with 5 digits, followed by codes specific to MARC format
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
!:mime application/marc
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
!:mime application/marc
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
!:mime application/marc
# leader position 22-23, should be "00" but is it?
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
!:mime application/marc

17
magic/matroska Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $
# matroska: file(1) magic for Matroska files
#
# See http://www.matroska.org/
#
# EBML id:
0 belong 0x1a45dfa3
# DocType id:
>4 search/4096 \x42\x82
# DocType contents:
>>&1 string webm WebM
!:mime video/webm
>>&1 string matroska Matroska data
!:mime video/x-matroska

9
magic/misctools Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#-----------------------------------------------------------------------------
# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $
# misctools: file(1) magic for miscellaneous UNIX tools.
#
0 string/c BEGIN:VCALENDAR vCalendar calendar file
!:mime text/calendar
0 string/c BEGIN:VCARD vCard visiting card
!:mime text/x-vcard

368
magic/msdos Normal file
View file

@ -0,0 +1,368 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
# msdos: file(1) magic for MS-DOS files
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek at Oct 2008,Apr 2011
0 string/t @
>1 string/cW \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW rem DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW set\ DOS batch file text
!:mime text/x-msdos-batch
# Tests for various EXE types.
#
# Many of the compressed formats were extraced from IDARC 1.23 source code.
#
0 string/b MZ DOS MZ
!:mime application/x-dosexec
# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
>0x18 leshort <0x40 MS-DOS executable
# These traditional tests usually work but not always. When test quality support is
# implemented these can be turned on.
#>>0x18 leshort 0x1c (Borland compiler)
#>>0x18 leshort 0x1e (MS compiler)
# If the relocation table is 0x40 or more bytes into the file, it's definitely
# not a DOS EXE.
>0x18 leshort >0x3f
# Maybe it's a PE?
>>(0x3c.l) string PE\0\0 PE
>>>(0x3c.l+24) leshort 0x010b \b32 executable
>>>(0x3c.l+24) leshort 0x020b \b32+ executable
>>>(0x3c.l+24) leshort 0x0107 ROM image
>>>(0x3c.l+24) default x Unknown PE signature
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
>>>(0x3c.l+92) leshort 1 (native)
>>>(0x3c.l+92) leshort 2 (GUI)
>>>(0x3c.l+92) leshort 3 (console)
>>>(0x3c.l+92) leshort 7 (POSIX)
>>>(0x3c.l+92) leshort 9 (Windows CE)
>>>(0x3c.l+92) leshort 10 (EFI application)
>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
>>>(0x3c.l+92) leshort 13 (EFI ROM)
>>>(0x3c.l+92) leshort 14 (XBOX)
>>>(0x3c.l+92) leshort 15 (Windows boot application)
>>>(0x3c.l+92) default x (Unknown subsystem
>>>>&0 leshort x 0x%x)
>>>(0x3c.l+4) leshort 0x14c Intel 80386
>>>(0x3c.l+4) leshort 0x166 MIPS R4000
>>>(0x3c.l+4) leshort 0x168 MIPS R10000
>>>(0x3c.l+4) leshort 0x184 Alpha
>>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3
>>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4
>>>(0x3c.l+4) leshort 0x1c0 ARM
>>>(0x3c.l+4) leshort 0x1c2 ARM Thumb
>>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb
>>>(0x3c.l+4) leshort 0x1f0 PowerPC
>>>(0x3c.l+4) leshort 0x200 Intel Itanium
>>>(0x3c.l+4) leshort 0x266 MIPS16
>>>(0x3c.l+4) leshort 0x268 Motorola 68000
>>>(0x3c.l+4) leshort 0x290 PA-RISC
>>>(0x3c.l+4) leshort 0x366 MIPSIV
>>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU
>>>(0x3c.l+4) leshort 0xebc EFI byte code
>>>(0x3c.l+4) leshort 0x8664 x86-64
>>>(0x3c.l+4) leshort 0xc0ee MSIL
>>>(0x3c.l+4) default x Unknown processor type
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB)
>>>(0x3c.l+22) leshort&0x1000 >0 system file
>>>(0x3c.l+24) leshort 0x010b
>>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
>>>(0x3c.l+24) leshort 0x020b
>>>>(0x3c.l+248) lelong >0 Mono/.Net assembly
# hooray, there's a DOS extender using the PE format, with a valid PE
# executable inside (which just prints a message and exits if run in win)
>>>(8.s*16) string 32STUB \b, 32rtm DOS extender
>>>(8.s*16) string !32STUB \b, for MS Windows
>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
>>>(0x3c.l+0xf8) search/0x140 UPX2
>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>(0x3c.l+0xf8) search/0x140 .idata
>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .rsrc
>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .data
>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
>>>>(0x3c.l+0xf7) byte x
>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
>>>0x30 string Inno \b, InnoSetup self-extracting archive
# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
# must be one of the unusual subformats.
>>(0x3c.l) string !PE\0\0 MS-DOS executable
>>(0x3c.l) string NE \b, NE
>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
>>>(0x3c.l+0x36) byte 3 for MS-DOS
>>>(0x3c.l+0x36) byte 4 for Windows 386
>>>(0x3c.l+0x36) byte 5 for Borland Operating System Services
>>>(0x3c.l+0x36) default x
>>>>(0x3c.l+0x36) byte x (unknown OS %x)
>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
>>(0x3c.l) string LX\0\0 \b, LX
>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
>>>(0x3c.l+0x0a) leshort 1 for OS/2
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
>>>(0x3c.l+0x08) leshort 1 i80286
>>>(0x3c.l+0x08) leshort 2 i80386
>>>(0x3c.l+0x08) leshort 3 i80486
>>>(8.s*16) string emx \b, emx
>>>>&1 string x %s
>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
# MS Windows system file, supposedly a collection of LE executables
>>(0x3c.l) string W3 \b, W3 for MS Windows
>>(0x3c.l) string LE\0\0 \b, LE executable
>>>(0x3c.l+0x0a) leshort 1
# some DOS extenders use LE files with OS/2 header
>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
# this is a wild guess; hopefully it is a specific signature
>>>>&0x24 lelong <0x50
>>>>>(&0x4c.l) string \xfc\xb8WATCOM
>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
# fails with DOS-Extenders.
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
# looks like ASCII, probably some embedded copyright message.
# and definitely not NE/LE/LX/PE
>>0x3c lelong >0x20000000
>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
# header data too small for extended executable
>2 long !0
>>0x18 leshort <0x40
>>>(4.s*512) leshort !0x014c
>>>>&(2.s-514) string !LE
>>>>>&-2 string !BW \b, MZ for MS-DOS
>>>>&(2.s-514) string LE \b, LE
>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
# educated guess since indirection is still not capable enough for complex offset
# calculations (next embedded executable would be at &(&2*512+&0-2)
# I suspect there are only LE executables in these multi-exe files
>>>>&(2.s-514) string BW
>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
# This sequence skips to the first COFF segment, usually .text
>(4.s*512) leshort 0x014c \b, COFF
>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
>>(8.s*16) string emx
>>>&1 string x for DOS, Win or OS/2, emx %s
>>&(&0x42.l-3) byte x
>>>&0x26 string UPX \b, UPX compressed
# and yet another guess: small .text, and after large .data is unusal, could be 32lite
>>&0x2c search/0xa0 .text
>>>&0x0b lelong <0x2000
>>>>&0 lelong >0x6000 \b, 32lite compressed
>(8.s*16) string $WdX \b, WDos/X DOS extender
# By now an executable type should have been printed out. The executable
# may be a self-uncompressing archive, so look for evidence of that and
# print it out.
#
# Some signatures below from Greg Roelofs, newt@uchicago.edu.
#
>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
>0xe7 string LH/2\ Self-Extract \b, %s
>0x1c string UC2X \b, UCEXE compressed
>0x1c string WWP\ \b, WWPACK compressed
>0x1c string RJSX \b, ARJ self-extracting archive
>0x1c string diet \b, diet compressed
>0x1c string LZ09 \b, LZEXE v0.90 compressed
>0x1c string LZ91 \b, LZEXE v0.91 compressed
>0x1c string tz \b, TinyProg compressed
>0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
!:mime application/zip
# Yes, this really is "Copr", not "Corp."
>0x1e string PKLITE\ Copr. Self-extracting PKZIP archive
!:mime application/zip
# winarj stores a message in the stub instead of the sig in the MZ header
>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
>0x20 string AIN
>>0x23 string 2 \b, AIN 2.x compressed
>>0x23 string <2 \b, AIN 1.x compressed
>>0x23 string >2 \b, AIN 1.x compressed
>0x24 string LHa's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string LHA's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string \ $ARX \b, ARX self-extracting archive
>0x24 string \ $LHarc \b, LHarc self-extracting archive
>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
>0x40 string aPKG \b, aPackage self-extracting archive
>0x64 string W\ Collis\0\0 \b, Compack compressed
>0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
>>&0xf4 search/0x140 \x0\x40\x1\x0
>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
>1638 string -lh5- \b, LHa self-extracting archive v2.13S
>0x17888 string Rar! \b, RAR self-extracting archive
# Skip to the end of the EXE. This will usually work fine in the PE case
# because the MZ image is hardcoded into the toolchain and almost certainly
# won't match any of these signatures.
>(4.s*512) long x
>>&(2.s-517) byte x
>>>&0 string PK\3\4 \b, ZIP self-extracting archive
>>>&0 string Rar! \b, RAR self-extracting archive
>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
>>>&7 search/400 **ACE** \b, ACE self-extracting archive
>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
# a few unknown ZIP sfxes, no idea if they are needed or if they are
# already captured by the generic patterns above
>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
#
# TELVOX Teleinformatica CODEC self-extractor for OS/2:
>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
>>49824 leshort =1 \b, 1 file
>>49824 leshort >1 \b, %u files
# Popular applications
2080 string Microsoft\ Word\ 6.0\ Document %s
!:mime application/msword
2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
!:mime application/msword
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
2112 string MSWordDoc Microsoft Word document data
!:mime application/msword
#
0 belong 0x31be0000 Microsoft Word Document
!:mime application/msword
#
0 string/b PO^Q` Microsoft Word 6.0 Document
!:mime application/msword
#
0 string/b \376\067\0\043 Microsoft Office Document
!:mime application/msword
0 string/b \333\245-\0\0\0 Microsoft Office Document
!:mime application/msword
512 string/b \354\245\301 Microsoft Word Document
!:mime application/msword
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
#
2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
!:mime application/vnd.ms-excel
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
!:mime application/vnd.ms-excel
#
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
2114 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
# Italian MS-Excel
2121 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
!:mime application/vnd.ms-excel
#
0 belong 0x00001a00 Lotus 1-2-3
!:mime application/x-123
#
0 belong 0x00000200 Lotus 1-2-3
!:mime application/x-123
0 string/b WordPro\0 Lotus WordPro
!:mime application/vnd.lotus-wordpro
0 string/b WordPro\r\373 Lotus WordPro
!:mime application/vnd.lotus-wordpro
# Windows icons (Ian Springer <ips@fpk.hp.com>)
0 string/b \000\000\001\000 MS Windows icon resource
!:mime image/x-icon
# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
# only for windows versions equal or greater 3.0
0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File
!:mime application/x-dosexec
# TNEF magic From "Joomy" <joomy@se-ed.net>
# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
0 leshort 0x223e9f78 TNEF
!:mime application/vnd.ms-tnef
#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed
# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
# False positive with PPT (also currently this string is too long)
#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document
#>48 byte 0x1B Excel Document
#!:mime application/vnd.ms-excel
>546 string bjbj Microsoft Word Document
!:mime application/msword
>546 string jbjb Microsoft Word Document
!:mime application/msword
0 string/b \224\246\056 Microsoft Word Document
!:mime application/msword
512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
!:mime application/msword
# MS eBook format (.lit)
0 string/b ITOLITLS Microsoft Reader eBook Data
>8 lelong x \b, version %u
!:mime application/x-ms-reader

12
magic/neko Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
# From: Mikhail Gusarov <dottedmag@dottedmag.net>
# NekoVM (http://nekovm.org/) bytecode
0 string NEKO NekoVM bytecode
>4 lelong x (%d global symbols,
>8 lelong x %d global fields,
>12 lelong x %d bytecode ops)
!:mime application/x-nekovm-bytecode

11
magic/pascal Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pascal: file(1) magic for Pascal source
#
0 search/8192 (input, Pascal source text
!:mime text/x-pascal
0 regex \^program Pascal source text
!:mime text/x-pascal
0 regex \^record Pascal source text
!:mime text/x-pascal

8
magic/pdf Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pdf: file(1) magic for Portable Document Format
#
0 string %PDF- PDF document
!:mime application/pdf

26
magic/perl Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $
# perl: file(1) magic for Larry Wall's perl language.
#
# The `eval' lines recognizes an outrageously clever hack.
# Keith Waclena <keith@cerberus.uchicago.edu>
# Send additions to <perl5-porters@perl.org>
0 search/1/w #!\ /bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/local/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text
!:mime text/x-perl
0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text
!:mime text/x-perl
0 search/1 #!/usr/bin/env\ perl Perl script text executable
!:mime text/x-perl
0 search/1 #!\ /usr/bin/env\ perl Perl script text executable
!:mime text/x-perl

27
magic/pgp Normal file
View file

@ -0,0 +1,27 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pgp: file(1) magic for Pretty Good Privacy
# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
#
0 beshort 0x9900 PGP key public ring
!:mime application/x-pgp-keyring
0 beshort 0x9501 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0x9500 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0xa600 PGP encrypted data
#!:mime application/pgp-encrypted
#0 string -----BEGIN\040PGP text/PGP armored data
!:mime text/PGP # encoding: armored data
#>15 string PUBLIC\040KEY\040BLOCK- public key block
#>15 string MESSAGE- message
#>15 string SIGNED\040MESSAGE- signed message
#>15 string PGP\040SIGNATURE- signature
2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block
!:mime application/pgp-keys
0 string -----BEGIN\040PGP\40MESSAGE- PGP message
!:mime application/pgp
0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature
!:mime application/pgp-signature

7
magic/pkgadd Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pkgadd: file(1) magic for SysV R4 PKG Datastreams
#
0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4)
!:mime application/x-svr4-package

14
magic/printer Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $
# printer: file(1) magic for printer-formatted files
#
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT

46
magic/python Normal file
View file

@ -0,0 +1,46 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $
# python: file(1) magic for python
#
0 search/1/w #!\ /usr/bin/python Python script text executable
!:mime text/x-python
0 search/1/w #!\ /usr/local/bin/python Python script text executable
!:mime text/x-python
0 search/1 #!/usr/bin/env\ python Python script text executable
!:mime text/x-python
0 search/1 #!\ /usr/bin/env\ python Python script text executable
!:mime text/x-python
# from module.submodule import func1, func2
0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
!:mime text/x-python
# def __init__ (self, ...):
0 search/4096 def\ __init__
>&0 search/64 self Python script text executable
!:mime text/x-python
# comments
0 search/4096 '''
>&0 regex .*'''$ Python script text executable
!:mime text/x-python
0 search/4096 """
>&0 regex .*"""$ Python script text executable
!:mime text/x-python
# try:
# except: or finally:
# block
0 search/4096 try:
>&0 regex \^\\s*except.*: Python script text executable
!:mime text/x-python
>&0 search/4096 finally: Python script text executable
!:mime text/x-python
# def name(args, args):
0 regex \^(\ |\\t)*def\ +[a-zA-Z]+
>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
!:mime text/x-python

36
magic/riff Normal file
View file

@ -0,0 +1,36 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $
# riff: file(1) magic for RIFF format
# See
#
# http://www.seanet.com/users/matts/riffmci/riffmci.htm
#
# AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
#
0 string RIFF RIFF (little-endian) data
# Microsoft WAVE format (*.wav)
>8 string WAVE \b, WAVE audio
!:mime audio/x-wav
# Corel Draw Picture
>8 string CDRA \b, Corel Draw Picture
!:mime image/x-coreldraw
# AVI == Audio Video Interleave
>8 string AVI\040 \b, AVI
!:mime video/x-msvideo
#------------------------------------------------------------------------------
# Sony Wave64
# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf
# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian
0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data
# 128 bit + total file size (64 bits) so 24 bytes
# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A }
>24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio
!:mime audio/x-w64
#------------------------------------------------------------------------------
# MBWF/RF64
# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf
0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio
!:mime audio/x-wav

12
magic/rpm Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $
#
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
#
0 belong 0xedabeedb RPM
!:mime application/x-rpm
#delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM
!:mime application/x-rpm

9
magic/rtf Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# rtf: file(1) magic for Rich Text Format (RTF)
#
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
#
0 string {\\rtf Rich Text Format data,
!:mime text/rtf

28
magic/ruby Normal file
View file

@ -0,0 +1,28 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $
# ruby: file(1) magic for Ruby scripting language
# URL: http://www.ruby-lang.org/
# From: Reuben Thomas <rrt@sc3d.org>
# Ruby scripts
0 search/1/w #!\ /usr/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!/usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
# What looks like ruby, but does not have a shebang
# (modules and such)
# From: Lubomir Rintel <lkundrak@v3.sk>
0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+'
>0 regex include\ [A-Z]|def\ [a-z]|\ do$
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text
!:mime text/x-ruby
0 regex \^[\ \t]*(class|module)[\ \t][A-Z]
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text
!:mime text/x-ruby

7
magic/sc Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# sc: file(1) magic for "sc" spreadsheet
#
38 string Spreadsheet sc spreadsheet file
!:mime application/x-sc

82
magic/sgml Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $
# Type: SVG Vectorial Graphics
# From: Noel Torres <tecnico@ejerciciosresueltos.com>
0 string \<?xml\ version="
>15 string >\0
>>19 search/4096 \<svg SVG Scalable Vector Graphics image
!:mime image/svg+xml
>>19 search/4096 \<gnc-v2 GnuCash file
!:mime application/x-gnucash
# Sitemap file
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096 \<urlset XML Sitemap document text
!:mime application/xml-sitemap
# xhtml
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version='
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<html broken XHTML document text
!:mime text/html
#------------------------------------------------------------------------------
# sgml: file(1) magic for Standard Generalized Markup Language
# HyperText Markup Language (HTML) is an SGML document type,
# from Daniel Quinlan (quinlan@yggdrasil.com)
# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
0 search/4096/cWt \<!doctype\ html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<head HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<title HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<script HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<style HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<table HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<a\ href= HTML document text
!:mime text/html
!:strength + 5
# Extensible markup language (XML), a subset of SGML
# from Marc Prud'hommeaux (marc@apocalypse.org)
0 search/1/cwt \<?xml XML document text
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version\ " XML
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version=" XML
!:mime application/xml
!:strength + 5
0 string \<?xml\ version=' XML
!:mime application/xml
!:strength + 5
0 search/1/wbt \<?xml XML document text
!:mime application/xml
!:strength - 10
0 search/1/wt \<?XML broken XML document text
!:mime application/xml
!:strength - 10

17
magic/sniffer Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# sniffer: file(1) magic for packet capture files
#
# From: guy@alum.mit.edu (Guy Harris)
#
#
# "libpcap" capture files.
# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
# the main program that uses that format, but there are other programs
# that use "libpcap", or that use the same capture file format.)
#
0 ubelong 0xa1b2c3d4 tcpdump capture file (big-endian)
!:mime application/vnd.tcpdump.pcap
0 ulelong 0xa1b2c3d4 tcpdump capture file (little-endian)
!:mime application/vnd.tcpdump.pcap

23
magic/tcl Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# file: file(1) magic for Tcl scripting language
# URL: http://www.tcl.tk/
# From: gustaf neumann
# Tcl scripts
0 search/1/w #!\ /usr/bin/tcl Tcl script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/local/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl

56
magic/tex Normal file
View file

@ -0,0 +1,56 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: tex,v 1.17 2010/09/20 19:19:17 rrt Exp $
# tex: file(1) magic for TeX files
#
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
#
# From <conklin@talisman.kaleida.com>
# Although we may know the offset of certain text fields in TeX DVI
# and font files, we can't use them reliably because they are not
# zero terminated. [but we do anyway, christos]
0 string \367\002 TeX DVI file
!:mime application/x-dvi
# There is no way to detect TeX Font Metric (*.tfm) files without
# breaking them apart and reading the data. The following patterns
# match most *.tfm files generated by METAFONT or afm2tfm.
2 string \000\021 TeX font metric data
!:mime application/x-tex-tfm
2 string \000\022 TeX font metric data
!:mime application/x-tex-tfm
# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/1 \\input\ texinfo Texinfo source text
!:mime text/x-texinfo
0 search/1 This\ is\ Info\ file GNU Info text
!:mime text/x-info
# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/4096 \\input TeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\section LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\setlength LaTeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\documentstyle LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\chapter LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\documentclass LaTeX 2e document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\relax LaTeX auxiliary file
!:mime text/x-tex
!:strength + 15
0 search/4096 \\contentsline LaTeX table of contents
!:mime text/x-tex
!:strength + 15
0 search/4096 %\ -*-latex-*- LaTeX document text
!:mime text/x-tex

22
magic/troff Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# troff: file(1) magic for *roff
#
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
# troff input
0 search/1 .\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '.\\" troff or preprocessor input text
!:mime text/troff
0 search/1 \\" troff or preprocessor input text
!:mime text/troff
0 search/1 ''' troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
!:mime text/troff

26
magic/vorbis Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# vorbis: file(1) magic for Ogg/Vorbis files
#
# From Felix von Leitner <leitner@fefe.de>
# Extended by Beni Cherniavsky <cben@crosswinds.net>
# Further extended by Greg Wooledge <greg@wooledge.org>
#
# Most (everything but the number of channels and bitrate) is commented
# out with `##' as it's not interesting to the average user. The most
# probable things advanced users would want to uncomment are probably
# the number of comments and the encoder version.
#
# FIXME: The first match has been made a search, so that it can skip
# over prepended ID3 tags. This will work for MIME type detection, but
# won't work for detecting other properties of the file (they all need
# to be made relative to the search). In any case, if the file has ID3
# tags, the ID3 information will be printed, not the Ogg information,
# so until that's fixed, this doesn't matter.
# FIXME[2]: Disable the above for now, since search assumes text mode.
#
# --- Ogg Framing ---
#0 search/1000 OggS Ogg data
0 string OggS Ogg data
!:mime application/ogg

14
magic/warc Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
# warc: file(1) magic for WARC files
0 string WARC/ WARC Archive
>5 string x version %.4s
!:mime application/warc
#------------------------------------------------------------------------------
# Arc File Format from Internet Archive
# see http://www.archive.org/web/researcher/ArcFileFormat.php
0 string filedesc:// Internet Archive File
!:mime application/x-ia-arc

19
magic/windows Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
# windows: file(1) magic for Microsoft Windows
#
# This file is mainly reserved for files where programs
# using them are run almost always on MS Windows 3.x or
# above, or files only used exclusively in Windows OS,
# where there is no better category to allocate for.
# For example, even though WinZIP almost run on Windows
# only, it is better to treat them as "archive" instead.
# For format usable in DOS, such as generic executable
# format, please specify under "msdos" file.
#
# From: Pal Tamas <folti@balabit.hu>
# Autorun File
0 string/c [autorun]\r\n Microsoft Windows Autorun file.
!:mime application/x-setupscript.

43
magic/wordprocessors Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $
# wordprocessors: file(1) magic fo word processors.
#
# Hangul (Korean) Word Processor File
# From: Won-Kyu Park <wkpark@kldp.org>
512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000
!:mime application/x-hwp
# Quark Express from http://www.garykessler.net/library/file_sigs.html
2 string MMXPR3 Motorola Quark Express Document (English)
!:mime application/x-quark-xpress-3
#------------------------------------------------------------------------------
# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
#
# Contributor kenzo-:
# Reversed-engineered JS Ichitaro magic numbers
#
0 string DOC
>43 byte 0x14 Just System Word Processor Ichitaro v4
!:mime application/x-ichitaro4
0 string DOC
>43 byte 0x15 Just System Word Processor Ichitaro v5
!:mime application/x-ichitaro5
0 string DOC
>43 byte 0x16 Just System Word Processor Ichitaro v6
!:mime application/x-ichitaro6
# Type: Freemind mindmap documents
# From: Jamie Thompson <debian-bugs@jamie-thompson.co.uk>
0 string/w \<map\ version Freemind document
!:mime application/x-freemind
# Type: Scribus
# From: Werner Fink <werner@suse.de>
0 string \<SCRIBUSUTF8NEW\ Version Scribus Document
!:mime application/x-scribus

11
magic/xwindows Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: xwindows,v 1.7 2011/05/03 01:44:17 christos Exp $
# xwindows: file(1) magic for various X/Window system file formats.
# Xcursor data
# X11 mouse cursor format defined in libXcursor, see
# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
0 string Xcur Xcursor data
!:mime image/x-xcursor

View file

@ -0,0 +1 @@
@load ./main.bro

View file

@ -0,0 +1,351 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: Analyzer;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[Analyzer] &log;
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[AnalyzerTag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
##
## f: the file.
##
## Returns: true if the timeout will be postponed, or false if analysis
## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Sends a sequential stream of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
global data_stream: function(source: string, data: string);
## Sends a non-sequential chunk of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
##
## offset: the offset within the file that this chunk starts.
global data_chunk: function(source: string, data: string, offset: count);
## Signals a content gap in the file bytestream.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## offset: the offset within the file that this gap starts.
##
## len: the number of bytes that are missing.
global gap: function(source: string, offset: count, len: count);
## Signals the total size of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## size: the number of bytes that comprise the full file.
global set_size: function(source: string, size: count);
## Signals the end of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
global eof: function(source: string);
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool
{
return __postpone_timeout(f$id);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function data_stream(source: string, data: string)
{
__data_stream(source, data);
}
function data_chunk(source: string, data: string, offset: count)
{
__data_chunk(source, data, offset);
}
function gap(source: string, offset: count, len: count)
{
__gap(source, offset, len);
}
function set_size(source: string, size: count)
{
__set_size(source, size);
}
function eof(source: string)
{
__eof(source);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -2,4 +2,5 @@
@load ./readers/ascii @load ./readers/ascii
@load ./readers/raw @load ./readers/raw
@load ./readers/benchmark @load ./readers/benchmark
@load ./readers/binary

View file

@ -0,0 +1,8 @@
##! Interface for the binary input reader.
module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
}

View file

@ -300,7 +300,7 @@ type connection: record {
## one protocol analyzer is able to parse the same data. If so, all will ## one protocol analyzer is able to parse the same data. If so, all will
## be recorded. Also note that the recorced services are independent of any ## be recorded. Also note that the recorced services are independent of any
## transport-level protocols. ## transport-level protocols.
service: set[string]; service: set[string];
addl: string; ##< Deprecated. addl: string; ##< Deprecated.
hot: count; ##< Deprecated. hot: count; ##< Deprecated.
history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`. history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`.
@ -316,6 +316,73 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional; tunnel: EncapsulatingConnVector &optional;
}; };
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic
## internal metadata collected about a "file", which is essentially just a
## byte stream that is e.g. pulled from a network connection or possibly
## some other input source.
type fa_file: record {
## An identifier associated with a single file.
id: string;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional;
## The time at which the last activity for the file was seen.
last_active: time;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &default=default_file_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &default=default_file_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &optional;
} &redef;
## Fields of a SYN packet. ## Fields of a SYN packet.
## ##
## .. bro:see:: connection_SYN_packet ## .. bro:see:: connection_SYN_packet
@ -3034,3 +3101,4 @@ const snaplen = 8192 &redef;
@load base/frameworks/input @load base/frameworks/input
@load base/frameworks/file-analysis

View file

@ -1,4 +1,5 @@
@load ./utils-commands @load ./utils-commands
@load ./main @load ./main
@load ./file-analysis
@load ./file-extract @load ./file-extract
@load ./gridftp @load ./gridftp

View file

@ -0,0 +1,47 @@
@load ./main
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module FTP;
export {
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_handle_string(c: connection): string
{
return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
if ( info$passive )
# FTP client initiates data channel.
if ( is_orig )
# Don't care about FTP client data.
return "";
else
# Do care about FTP server data.
return get_handle_string(c);
else
# FTP server initiates dta channel.
if ( is_orig )
# Do care about FTP server data.
return get_handle_string(c);
else
# Don't care about FTP client data.
return "";
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));
}

View file

@ -13,53 +13,76 @@ export {
const extraction_prefix = "ftp-item" &redef; const extraction_prefix = "ftp-item" &redef;
} }
global extract_count: count = 0;
redef record Info += { redef record Info += {
## On disk file where it was extracted to. ## On disk file where it was extracted to.
extraction_file: file &log &optional; extraction_file: string &log &optional;
## Indicates if the current command/response pair should attempt to ## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred. ## extract the file if a file was transferred.
extract_file: bool &default=F; extract_file: bool &default=F;
## Internal tracking of the total number of files extracted during this
## session.
num_extracted_files: count &default=0;
}; };
event file_transferred(c: connection, prefix: string, descr: string, function get_extraction_name(f: fa_file): string
mime_type: string) &priority=3
{ {
local id = c$id; local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
if ( [id$resp_h, id$resp_p] !in ftp_data_expected ) ++extract_count;
return; return r;
}
local s = ftp_data_expected[id$resp_h, id$resp_p]; event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( extract_file_types in s$mime_type ) if ( f?$mime_type && extract_file_types in f$mime_type )
{ {
s$extract_file = T; FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
++s$num_extracted_files; $extract_filename=get_extraction_name(f)]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
local s = ftp_data_expected[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
} }
} }
event file_transferred(c: connection, prefix: string, descr: string, event file_state_remove(f: fa_file) &priority=4
mime_type: string) &priority=-4
{ {
local id = c$id; if ( ! f?$source ) return;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected ) if ( f$source != "FTP_DATA" ) return;
return; if ( ! f?$info ) return;
local s = ftp_data_expected[id$resp_h, id$resp_p]; for ( filename in f$info$extracted_files )
if ( s$extract_file )
{ {
local suffix = fmt("%d.dat", s$num_extracted_files); local s: FTP::Info;
local fname = generate_extraction_filename(extraction_prefix, c, suffix); s$ts = network_time();
s$extraction_file = open(fname); s$tags = set();
if ( s$passive ) s$user = "<ftp-data>";
set_contents_file(id, CONTENTS_RESP, s$extraction_file); s$extraction_file = filename;
else
set_contents_file(id, CONTENTS_ORIG, s$extraction_file); if ( f?$conns )
for ( cid in f$conns )
{
s$uid = f$conns[cid]$uid;
s$id = cid;
}
Log::write(FTP::LOG, s);
} }
} }

View file

@ -16,7 +16,8 @@ export {
## List of commands that should have their command/response pairs logged. ## List of commands that should have their command/response pairs logged.
const logged_commands = { const logged_commands = {
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT" "APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
"EPSV"
} &redef; } &redef;
## This setting changes if passwords used in FTP sessions are captured or not. ## This setting changes if passwords used in FTP sessions are captured or not.
@ -25,6 +26,18 @@ export {
## User IDs that can be considered "anonymous". ## User IDs that can be considered "anonymous".
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef; const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record { type Info: record {
## Time when the command was sent. ## Time when the command was sent.
ts: time &log; ts: time &log;
@ -43,8 +56,6 @@ export {
## Libmagic "sniffed" file type if the command indicates a file transfer. ## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional; mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer. ## Size of the file if the command indicates a file transfer.
file_size: count &log &optional; file_size: count &log &optional;
@ -53,7 +64,10 @@ export {
## Reply message from the server in response to the command. ## Reply message from the server in response to the command.
reply_msg: string &log &optional; reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command. ## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log &default=set(); tags: set[string] &log;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making ## Current working directory that this session is in. By making
## the default value '.', we can indicate that unless something ## the default value '.', we can indicate that unless something
@ -93,6 +107,7 @@ export {
# Add the state tracking information variable to the connection record # Add the state tracking information variable to the connection record
redef record connection += { redef record connection += {
ftp: Info &optional; ftp: Info &optional;
ftp_data_reuse: bool &default=F;
}; };
# Configure DPD # Configure DPD
@ -103,7 +118,7 @@ redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
redef likely_server_ports += { 21/tcp, 2811/tcp }; redef likely_server_ports += { 21/tcp, 2811/tcp };
# Establish the variable for tracking expected connections. # Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &create_expire=5mins; global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;
event bro_init() &priority=5 event bro_init() &priority=5
{ {
@ -194,10 +209,20 @@ function ftp_message(s: Info)
# and may not be used in all commands so they need reset to "blank" # and may not be used in all commands so they need reset to "blank"
# values after logging. # values after logging.
delete s$mime_type; delete s$mime_type;
delete s$mime_desc;
delete s$file_size; delete s$file_size;
# Same with data channel.
delete s$data_channel;
# Tags are cleared everytime too. # Tags are cleared everytime too.
delete s$tags; s$tags = set();
}
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
{
s$passive = chan$passive;
s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s;
expect_connection(chan$orig_h, chan$resp_h, chan$resp_p, ANALYZER_FTP_DATA,
5mins);
} }
event ftp_request(c: connection, command: string, arg: string) &priority=5 event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -232,9 +257,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
if ( data$valid ) if ( data$valid )
{ {
c$ftp$passive=F; add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
ftp_data_expected[data$h, data$p] = c$ftp; $resp_h=data$h, $resp_p=data$p]);
expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
} }
else else
{ {
@ -283,8 +307,8 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
if ( code == 229 && data$h == [::] ) if ( code == 229 && data$h == [::] )
data$h = c$id$resp_h; data$h = c$id$resp_h;
ftp_data_expected[data$h, data$p] = c$ftp; add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
expect_connection(c$id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins); $resp_h=data$h, $resp_p=data$p]);
} }
else else
{ {
@ -314,7 +338,6 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
} }
} }
event expected_connection_seen(c: connection, a: count) &priority=10 event expected_connection_seen(c: connection, a: count) &priority=10
{ {
local id = c$id; local id = c$id;
@ -330,16 +353,19 @@ event file_transferred(c: connection, prefix: string, descr: string,
{ {
local s = ftp_data_expected[id$resp_h, id$resp_p]; local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1]; s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
} }
} }
event file_transferred(c: connection, prefix: string, descr: string, event connection_reused(c: connection) &priority=5
mime_type: string) &priority=-5
{ {
local id = c$id; if ( "ftp-data" in c$service )
if ( [id$resp_h, id$resp_p] in ftp_data_expected ) c$ftp_data_reuse = T;
delete ftp_data_expected[id$resp_h, id$resp_p]; }
event connection_state_remove(c: connection) &priority=-5
{
if ( c$ftp_data_reuse ) return;
delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
} }
# Use state remove event to cover connections terminated by RST. # Use state remove event to cover connections terminated by RST.

View file

@ -1,5 +1,6 @@
@load ./main @load ./main
@load ./utils @load ./utils
@load ./file-analysis
@load ./file-ident @load ./file-ident
@load ./file-hash @load ./file-hash
@load ./file-extract @load ./file-extract

View file

@ -0,0 +1,31 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
if ( c$http$range_request )
return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -2,8 +2,7 @@
##! the message body from the server can be extracted with this script. ##! the message body from the server can be extracted with this script.
@load ./main @load ./main
@load ./file-ident @load ./file-analysis
@load base/utils/files
module HTTP; module HTTP;
@ -16,45 +15,70 @@ export {
redef record Info += { redef record Info += {
## On-disk file where the response body was extracted to. ## On-disk file where the response body was extracted to.
extraction_file: file &log &optional; extraction_file: string &log &optional;
## Indicates if the response body is to be extracted or not. Must be ## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the ## set before or by the first :bro:see:`file_new` for the file content.
## content.
extract_file: bool &default=F; extract_file: bool &default=F;
}; };
} }
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5 global extract_count: count = 0;
function get_extraction_name(f: fa_file): string
{ {
# Client body extraction is not currently supported in this script. local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
if ( is_orig ) ++extract_count;
return; return r;
}
if ( c$http$first_chunk ) event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{ {
if ( c$http?$mime_type && fname = get_extraction_name(f);
extract_file_types in c$http$mime_type ) FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{ {
c$http$extract_file = T; c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
} }
if ( c$http$extract_file ) return;
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
}
} }
if ( c$http?$extraction_file ) local extracting: bool = F;
print c$http$extraction_file, data;
}
event http_end_entity(c: connection, is_orig: bool) for ( cid in f$conns )
{ {
if ( c$http?$extraction_file ) c = f$conns[cid];
close(c$http$extraction_file);
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
} }

View file

@ -1,15 +1,11 @@
##! Calculate hashes for HTTP body transfers. ##! Calculate hashes for HTTP body transfers.
@load ./file-ident @load ./main
@load ./file-analysis
module HTTP; module HTTP;
export { export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += { redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the ## MD5 sum for a file transferred over HTTP calculated from the
## response body. ## response body.
@ -19,10 +15,6 @@ export {
## if a file should have an MD5 sum generated. It must be ## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data. ## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F; calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
}; };
## Generate MD5 sums for these filetypes. ## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
&redef; &redef;
} }
## Initialize and calculate the hash. event file_new(f: fa_file) &priority=5
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{ {
if ( is_orig || ! c?$http ) return; if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( c$http$first_chunk ) if ( f?$mime_type && generate_md5 in f$mime_type )
{ {
if ( c$http$calc_md5 || FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
(c$http?$mime_type && generate_md5 in c$http$mime_type) ) return;
{
c$http$md5_handle = md5_hash_init();
}
} }
if ( c$http?$md5_handle ) if ( ! f?$conns ) return;
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for for ( cid in f$conns )
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
{
if ( is_orig || ! c?$http ) return;
if ( c$http?$md5_handle )
{ {
local url = build_url_http(c$http); local c: connection = f$conns[cid];
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url), if ( ! c?$http ) next;
$sub=c$http$md5, $conn=c]);
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
} }
} }
event connection_state_remove(c: connection) &priority=-5 event file_state_remove(f: fa_file) &priority=4
{ {
if ( c?$http_state && if ( ! f?$source ) return;
c$http_state$current_response in c$http_state$pending && if ( f$source != "HTTP" ) return;
c$http_state$pending[c$http_state$current_response]?$md5_handle ) if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{ {
# The MD5 sum isn't going to be saved anywhere since the entire local c: connection = f$conns[cid];
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum. if ( ! c?$http ) next;
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle; c$http$md5 = f$info$md5;
} }
} }

View file

@ -1,37 +1,28 @@
##! Identification of file types in HTTP response bodies with file content sniffing. ##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice @load base/frameworks/notice
@load ./main @load ./main
@load ./utils @load ./utils
@load ./file-analysis
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
module HTTP; module HTTP;
export { export {
redef enum Notice::Type += { redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file contents. ## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type, Incorrect_File_Type,
}; };
redef record Info += { redef record Info += {
## Mime type of response body identified by content sniffing. ## Mime type of response body identified by content sniffing.
mime_type: string &log &optional; mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
}; };
## Mapping between mime types and regular expressions for URLs ## Mapping between mime type strings (without character set) and
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern ## regular expressions for URLs.
## doesn't match the mime type that was discovered. ## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = { const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/, ["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef; } &redef;
@ -43,43 +34,72 @@ export {
const ignored_incorrect_file_type_urls = /^$/ &redef; const ignored_incorrect_file_type_urls = /^$/ &redef;
} }
event signature_match(state: signature_state, msg: string, data: string) &priority=5 event file_new(f: fa_file) &priority=5
{ {
# Only signatures matching file types are dealt with here. if ( ! f?$source ) return;
if ( /^matchfile-/ !in state$sig_id ) return; if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
local c = state$conn; for ( cid in f$conns )
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
{ {
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http); local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls ) if ( url == ignored_incorrect_file_type_urls ) next;
return;
local message = fmt("%s %s %s", msg, c$http$method, url); local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type, NOTICE([$note=Incorrect_File_Type,
$msg=message, $msg=message,
$conn=c]); $conn=c]);
} }
} }
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5 event file_over_new_connection(f: fa_file, c: connection) &priority=5
{ {
if ( c$http$first_chunk && ! c$http?$mime_type ) if ( ! f?$source ) return;
c$http$mime_type = split1(identify_data(data, T), /;/)[1]; if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
} }
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10 # Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{ {
if ( c$http$first_chunk ) if ( ! c$http$range_request ) return;
c$http$first_chunk=F; if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
} }

View file

@ -1,144 +0,0 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}

View file

@ -71,6 +71,10 @@ export {
## All of the headers that may indicate if the request was proxied. ## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional; proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in
## response.
range_request: bool &default=F;
}; };
## Structure to maintain state for an HTTP connection with multiple ## Structure to maintain state for an HTTP connection with multiple
@ -236,6 +240,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
# The split is done to remove the occasional port value that shows up here. # The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1]; c$http$host = split1(value, /:/)[1];
else if ( name == "RANGE" )
c$http$range_request = T;
else if ( name == "USER-AGENT" ) else if ( name == "USER-AGENT" )
c$http$user_agent = value; c$http$user_agent = value;

View file

@ -1,2 +1,3 @@
@load ./main @load ./main
@load ./dcc-send @load ./dcc-send
@load ./file-analysis

View file

@ -30,67 +30,138 @@ export {
dcc_mime_type: string &log &optional; dcc_mime_type: string &log &optional;
## The file handle for the file to be extracted ## The file handle for the file to be extracted
extraction_file: file &log &optional; extraction_file: string &log &optional;
## A boolean to indicate if the current file transfer should be extracted. ## A boolean to indicate if the current file transfer should be extracted.
extract_file: bool &default=F; extract_file: bool &default=F;
## The count of the number of file that have been extracted during the session.
num_extracted_files: count &default=0;
}; };
} }
global dcc_expected_transfers: table[addr, port] of Info = table(); global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
event file_transferred(c: connection, prefix: string, descr: string, global extract_count: count = 0;
mime_type: string) &priority=3
function set_dcc_mime(f: fa_file)
{ {
local id = c$id; if ( ! f?$conns ) return;
if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
return;
local irc = dcc_expected_transfers[id$resp_h, id$resp_p]; for ( cid in f$conns )
irc$dcc_mime_type = split1(mime_type, /;/)[1];
if ( extract_file_types == irc$dcc_mime_type )
{ {
irc$extract_file = T; local c: connection = f$conns[cid];
}
if ( irc$extract_file ) if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
{
local suffix = fmt("%d.dat", ++irc$num_extracted_files); local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
irc$extraction_file = open(fname); s$dcc_mime_type = f$mime_type;
} }
} }
event file_transferred(c: connection, prefix: string, descr: string, function set_dcc_extraction_file(f: fa_file, filename: string)
mime_type: string) &priority=-4
{ {
local id = c$id; if ( ! f?$conns ) return;
if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$extraction_file = filename;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
return r;
}
# this handler sets the IRC::Info mime type
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
if ( ! f?$mime_type ) return;
set_dcc_mime(f);
}
# this handler check if file extraction is desired
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
local fname: string;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
set_dcc_extraction_file(f, fname);
return; return;
}
local irc = dcc_expected_transfers[id$resp_h, id$resp_p]; if ( ! f?$conns ) return;
local tmp = irc$command; for ( cid in f$conns )
irc$command = "DCC"; {
Log::write(IRC::LOG, irc); local c: connection = f$conns[cid];
irc$command = tmp;
if ( irc?$extraction_file ) if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
set_contents_file(id, CONTENTS_RESP, irc$extraction_file);
# Delete these values in case another DCC transfer local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
# happens during the IRC session.
delete irc$extract_file; if ( ! s$extract_file ) next;
delete irc$extraction_file;
delete irc$dcc_file_name; fname = get_extraction_name(f);
delete irc$dcc_file_size; FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
delete irc$dcc_mime_type; $extract_filename=fname]);
delete dcc_expected_transfers[id$resp_h, id$resp_p]; s$extraction_file = fname;
return;
}
}
function log_dcc(f: fa_file)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local irc = dcc_expected_transfers[cid$resp_h, cid$resp_p];
local tmp = irc$command;
irc$command = "DCC";
Log::write(IRC::LOG, irc);
irc$command = tmp;
# Delete these values in case another DCC transfer
# happens during the IRC session.
delete irc$extract_file;
delete irc$extraction_file;
delete irc$dcc_file_name;
delete irc$dcc_file_size;
delete irc$dcc_mime_type;
return;
}
}
event file_new(f: fa_file) &priority=-5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
log_dcc(f);
} }
event irc_dcc_message(c: connection, is_orig: bool, event irc_dcc_message(c: connection, is_orig: bool,
@ -100,11 +171,11 @@ event irc_dcc_message(c: connection, is_orig: bool,
{ {
set_session(c); set_session(c);
if ( dcc_type != "SEND" ) if ( dcc_type != "SEND" )
return; return;
c$irc$dcc_file_name = argument; c$irc$dcc_file_name = argument;
c$irc$dcc_file_size = size; c$irc$dcc_file_size = size;
local p = count_to_port(dest_port, tcp); local p = count_to_port(dest_port, tcp);
expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_FILE, 5 min); expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_IRC_DATA, 5 min);
dcc_expected_transfers[address, p] = c$irc; dcc_expected_transfers[address, p] = c$irc;
} }
@ -114,3 +185,8 @@ event expected_connection_seen(c: connection, a: count) &priority=10
if ( [id$resp_h, id$resp_p] in dcc_expected_transfers ) if ( [id$resp_h, id$resp_p] in dcc_expected_transfers )
add c$service["irc-dcc-data"]; add c$service["irc-dcc-data"];
} }
event connection_state_remove(c: connection) &priority=-5
{
delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
}

View file

@ -0,0 +1,24 @@
@load ./dcc-send.bro
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module IRC;
export {
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( is_orig ) return "";
return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_IRC_DATA ) return;
set_file_handle(IRC::get_file_handle(c, is_orig));
}

View file

@ -1,3 +1,4 @@
@load ./main @load ./main
@load ./entities @load ./entities
@load ./entities-excerpt @load ./entities-excerpt
@load ./file-analysis

View file

@ -9,44 +9,29 @@ export {
redef record SMTP::EntityInfo += { redef record SMTP::EntityInfo += {
## The entity body excerpt. ## The entity body excerpt.
excerpt: string &log &default=""; excerpt: string &log &default="";
## Internal tracking to know how much of the body should be included
## in the excerpt.
excerpt_len: count &optional;
}; };
## This is the default value for how much of the entity body should be ## This is the default value for how much of the entity body should be
## included for all MIME entities. ## included for all MIME entities. The lesser of this value and
## :bro:see:`default_file_bof_buffer_size` will be used.
const default_entity_excerpt_len = 0 &redef; const default_entity_excerpt_len = 0 &redef;
## This table defines how much of various entity bodies should be
## included in excerpts.
const entity_excerpt_len: table[string] of count = {}
&redef
&default = default_entity_excerpt_len;
} }
event mime_segment_data(c: connection, length: count, data: string) &priority=-1 event file_new(f: fa_file) &priority=5
{ {
if ( ! c?$smtp ) return; if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$bof_buffer ) return;
if ( ! f?$conns ) return;
if ( c$smtp$current_entity$content_len == 0 ) for ( cid in f$conns )
c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-2
{
if ( ! c?$smtp ) return;
local ent = c$smtp$current_entity;
if ( ent$content_len < ent$excerpt_len )
{ {
if ( ent$content_len + length < ent$excerpt_len ) local c: connection = f$conns[cid];
ent$excerpt = cat(ent$excerpt, data);
else if ( ! c?$smtp ) next;
{
local x_bytes = ent$excerpt_len - ent$content_len; if ( default_entity_excerpt_len > 0 )
ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes)); c$smtp$current_entity$excerpt =
} f$bof_buffer[0:default_entity_excerpt_len];
} }
} }

View file

@ -7,11 +7,6 @@
module SMTP; module SMTP;
export { export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for a MIME message.
MD5,
};
redef enum Log::ID += { ENTITIES_LOG }; redef enum Log::ID += { ENTITIES_LOG };
type EntityInfo: record { type EntityInfo: record {
@ -34,15 +29,12 @@ export {
## Optionally calculate the file's MD5 sum. Must be set prior to the ## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event. ## first data chunk being see in an event.
calc_md5: bool &default=F; calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is being calculated
## for the current file transfer.
md5_handle: opaque of md5 &optional;
## Optionally write the file to disk. Must be set prior to first ## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event. ## data chunk being seen in an event.
extract_file: bool &default=F; extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted. ## Store the file handle here for the file currently being extracted.
extraction_file: file &log &optional; extraction_file: string &log &optional;
}; };
redef record Info += { redef record Info += {
@ -51,9 +43,6 @@ export {
}; };
redef record State += { redef record State += {
## Store a count of the number of files that have been transferred in
## a conversation to create unique file names on disk.
num_extracted_files: count &default=0;
## Track the number of MIME encoded files transferred during a session. ## Track the number of MIME encoded files transferred during a session.
mime_level: count &default=0; mime_level: count &default=0;
}; };
@ -77,6 +66,8 @@ export {
global log_mime: event(rec: EntityInfo); global log_mime: event(rec: EntityInfo);
} }
global extract_count: count = 0;
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]); Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
@ -97,6 +88,13 @@ function set_session(c: connection, new_entity: bool)
} }
} }
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
return r;
}
event mime_begin_entity(c: connection) &priority=10 event mime_begin_entity(c: connection) &priority=10
{ {
if ( ! c?$smtp ) return; if ( ! c?$smtp ) return;
@ -104,70 +102,114 @@ event mime_begin_entity(c: connection) &priority=10
set_session(c, T); set_session(c, T);
} }
# This has priority -10 because other handlers need to know the current event file_new(f: fa_file) &priority=5
# content_len before it's updated by this handler.
event mime_segment_data(c: connection, length: count, data: string) &priority=-10
{ {
if ( ! c?$smtp ) return; if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length; local fname: string;
} local extracting: bool = F;
event mime_segment_data(c: connection, length: count, data: string) &priority=7 for ( cid in f$conns )
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
{ {
local entity = c$smtp$current_entity; local c: connection = f$conns[cid];
if ( generate_md5 in entity$mime_type && ! never_calc_md5 )
entity$calc_md5 = T;
if ( entity$calc_md5 ) if ( ! c?$smtp ) next;
entity$md5_handle = md5_hash_init(); if ( ! c$smtp?$current_entity ) next;
}
if ( c$smtp$current_entity?$md5_handle ) if ( c$smtp$current_entity$extract_file )
md5_hash_update(entity$md5_handle, data); {
} if ( ! extracting )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f,
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
++extract_count;
}
## In the event of a content gap during the MIME transfer, detect the state for c$smtp$current_entity$extraction_file = fname;
## the MD5 sum calculation and stop calculating the MD5 since it would be }
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
local entity = c$smtp$current_entity; if ( c$smtp$current_entity$calc_md5 )
if ( entity?$md5_handle ) FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
{
md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
} }
} }
event mime_end_entity(c: connection) &priority=-3 function check_extract_by_type(f: fa_file)
{ {
# TODO: this check is only due to a bug in mime_end_entity that if ( extract_file_types !in f$mime_type ) return;
# causes the event to be generated twice for the same real event.
if ( ! c?$smtp || ! c$smtp?$current_entity ) if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
return; return;
local entity = c$smtp$current_entity; local fname: string = get_extraction_name(f);
if ( entity?$md5_handle ) FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
{ $extract_filename=fname]);
entity$md5 = md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h), if ( ! f?$conns ) return;
$sub=entity$md5, $conn=c]);
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$extraction_file = fname;
}
}
function check_md5_by_type(f: fa_file)
{
if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$mime_type ) return;
if ( f?$conns )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
c$smtp$current_entity$mime_type = f$mime_type;
}
check_extract_by_type(f);
check_md5_by_type(f);
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
# Only log if there was some content.
if ( f$seen_bytes == 0 ) next;
if ( f?$info && f$info?$md5 )
c$smtp$current_entity$md5 = f$info$md5;
c$smtp$current_entity$content_len = f$seen_bytes;
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
return;
} }
} }
@ -183,62 +225,3 @@ event mime_one_header(c: connection, h: mime_header_rec)
/[nN][aA][mM][eE][:blank:]*=/ in h$value ) /[nN][aA][mM][eE][:blank:]*=/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
} }
event mime_end_entity(c: connection) &priority=-5
{
if ( ! c?$smtp ) return;
# This check and the delete below are just to cope with a bug where
# mime_end_entity can be generated multiple times for the same event.
if ( ! c$smtp?$current_entity )
return;
# Only log is there was some content.
if ( c$smtp$current_entity$content_len > 0 )
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( ! c?$smtp ) return;
if ( extract_file_types in c$smtp$current_entity$mime_type )
c$smtp$current_entity$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file &&
c$smtp$current_entity$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$smtp$current_entity$extraction_file = open(fname);
enable_raw_output(c$smtp$current_entity$extraction_file);
}
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
print c$smtp$current_entity$extraction_file, data;
}
event mime_end_entity(c: connection) &priority=-3
{
if ( ! c?$smtp ) return;
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity?$extraction_file )
close(c$smtp$current_entity$extraction_file);
}

View file

@ -0,0 +1,26 @@
@load ./main
@load ./entities
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module SMTP;
export {
## Default file handle provider for SMTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$smtp ) return "";
return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp_state$mime_level);
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_SMTP ) return;
set_file_handle(SMTP::get_file_handle(c, is_orig));
}

View file

@ -1,15 +1,31 @@
@load base/frameworks/intel @load base/frameworks/intel
@load base/protocols/smtp/file-analysis
@load base/utils/urls @load base/utils/urls
@load ./where-locations @load ./where-locations
event mime_segment_data(c: connection, length: count, data: string) &priority=3 event intel_mime_data(f: fa_file, data: string)
{ {
local urls = find_all_urls_without_scheme(data); if ( ! f?$conns ) return;
for ( url in urls )
for ( cid in f$conns )
{ {
Intel::seen([$str=url, local c: connection = f$conns[cid];
$str_type=Intel::URL, local urls = find_all_urls_without_scheme(data);
$conn=c, for ( url in urls )
$where=SMTP::IN_MESSAGE]); {
Intel::seen([$str=url,
$str_type=Intel::URL,
$conn=c,
$where=SMTP::IN_MESSAGE]);
}
} }
} }
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT,
$stream_event=intel_mime_data]);
}

View file

@ -150,6 +150,10 @@ const Analyzer::Config Analyzer::analyzer_configs[] = {
{ AnalyzerTag::File, "FILE", File_Analyzer::InstantiateAnalyzer, { AnalyzerTag::File, "FILE", File_Analyzer::InstantiateAnalyzer,
File_Analyzer::Available, 0, false }, File_Analyzer::Available, 0, false },
{ AnalyzerTag::IRC_Data, "IRC_DATA", IRC_Data::InstantiateAnalyzer,
IRC_Data::Available, 0, false },
{ AnalyzerTag::FTP_Data, "FTP_DATA", FTP_Data::InstantiateAnalyzer,
FTP_Data::Available, 0, false },
{ AnalyzerTag::Backdoor, "BACKDOOR", { AnalyzerTag::Backdoor, "BACKDOOR",
BackDoor_Analyzer::InstantiateAnalyzer, BackDoor_Analyzer::InstantiateAnalyzer,
BackDoor_Analyzer::Available, 0, false }, BackDoor_Analyzer::Available, 0, false },

View file

@ -41,7 +41,7 @@ namespace AnalyzerTag {
GTPv1, GTPv1,
// Other // Other
File, Backdoor, InterConn, SteppingStone, TCPStats, File, IRC_Data, FTP_Data, Backdoor, InterConn, SteppingStone, TCPStats,
ConnSize, ConnSize,
// Support-analyzers // Support-analyzers

View file

@ -145,6 +145,7 @@ set(BIF_SRCS
logging.bif logging.bif
input.bif input.bif
event.bif event.bif
file_analysis.bif
const.bif const.bif
types.bif types.bif
strings.bif strings.bif
@ -446,6 +447,17 @@ set(bro_SRCS
input/readers/Ascii.cc input/readers/Ascii.cc
input/readers/Raw.cc input/readers/Raw.cc
input/readers/Benchmark.cc input/readers/Benchmark.cc
input/readers/Binary.cc
file_analysis/Manager.cc
file_analysis/File.cc
file_analysis/FileTimer.cc
file_analysis/FileID.h
file_analysis/Analyzer.h
file_analysis/AnalyzerSet.cc
file_analysis/Extract.cc
file_analysis/Hash.cc
file_analysis/DataEvent.cc
nb_dns.c nb_dns.c
digest.h digest.h

View file

@ -16,7 +16,7 @@ DebugLogger::Stream DebugLogger::streams[NUM_DBGS] = {
{ "notifiers", 0, false }, { "main-loop", 0, false }, { "notifiers", 0, false }, { "main-loop", 0, false },
{ "dpd", 0, false }, { "tm", 0, false }, { "dpd", 0, false }, { "tm", 0, false },
{ "logging", 0, false }, {"input", 0, false }, { "logging", 0, false }, {"input", 0, false },
{ "threading", 0, false } { "threading", 0, false }, { "file_analysis", 0, false }
}; };
DebugLogger::DebugLogger(const char* filename) DebugLogger::DebugLogger(const char* filename)

View file

@ -26,6 +26,7 @@ enum DebugStream {
DBG_LOGGING, // Logging streams DBG_LOGGING, // Logging streams
DBG_INPUT, // Input streams DBG_INPUT, // Input streams
DBG_THREADING, // Threading system DBG_THREADING, // Threading system
DBG_FILE_ANALYSIS, // File analysis
NUM_DBGS // Has to be last NUM_DBGS // Has to be last
}; };

View file

@ -110,6 +110,9 @@ void EventMgr::Dispatch()
void EventMgr::Drain() void EventMgr::Drain()
{ {
if ( event_queue_flush_point )
QueueEvent(event_queue_flush_point, new val_list());
SegmentProfiler(segment_logger, "draining-events"); SegmentProfiler(segment_logger, "draining-events");
draining = true; draining = true;

View file

@ -1,19 +1,14 @@
#include <algorithm> #include <algorithm>
#include "file_analysis/Manager.h"
#include "FileAnalyzer.h" #include "FileAnalyzer.h"
#include "Reporter.h" #include "Reporter.h"
#include "util.h" #include "util.h"
magic_t File_Analyzer::magic = 0; File_Analyzer::File_Analyzer(AnalyzerTag::Tag tag, Connection* conn)
magic_t File_Analyzer::magic_mime = 0; : TCP_ApplicationAnalyzer(tag, conn)
File_Analyzer::File_Analyzer(Connection* conn)
: TCP_ApplicationAnalyzer(AnalyzerTag::File, conn)
{ {
buffer_len = 0; buffer_len = 0;
bro_init_magic(&magic, MAGIC_NONE);
bro_init_magic(&magic_mime, MAGIC_MIME);
} }
void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig) void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
@ -33,6 +28,11 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
return; return;
} }
void File_Analyzer::Undelivered(int seq, int len, bool orig)
{
TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
}
void File_Analyzer::Done() void File_Analyzer::Done()
{ {
TCP_ApplicationAnalyzer::Done(); TCP_ApplicationAnalyzer::Done();
@ -43,19 +43,59 @@ void File_Analyzer::Done()
void File_Analyzer::Identify() void File_Analyzer::Identify()
{ {
const char* descr = 0; const char* desc = bro_magic_buffer(magic_desc_cookie, buffer, buffer_len);
const char* mime = 0; const char* mime = bro_magic_buffer(magic_mime_cookie, buffer, buffer_len);
if ( magic )
descr = bro_magic_buffer(magic, buffer, buffer_len);
if ( magic_mime )
mime = bro_magic_buffer(magic_mime, buffer, buffer_len);
val_list* vl = new val_list; val_list* vl = new val_list;
vl->append(BuildConnVal()); vl->append(BuildConnVal());
vl->append(new StringVal(buffer_len, buffer)); vl->append(new StringVal(buffer_len, buffer));
vl->append(new StringVal(descr ? descr : "<unknown>")); vl->append(new StringVal(desc ? desc : "<unknown>"));
vl->append(new StringVal(mime ? mime : "<unknown>")); vl->append(new StringVal(mime ? mime : "<unknown>"));
ConnectionEvent(file_transferred, vl); ConnectionEvent(file_transferred, vl);
} }
IRC_Data::IRC_Data(Connection* conn)
: File_Analyzer(AnalyzerTag::IRC_Data, conn)
{
}
void IRC_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetTag(), Conn());
}
void IRC_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetTag(), Conn(), orig);
}
void IRC_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetTag(), Conn(), orig);
}
FTP_Data::FTP_Data(Connection* conn)
: File_Analyzer(AnalyzerTag::FTP_Data, conn)
{
}
void FTP_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetTag(), Conn());
}
void FTP_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetTag(), Conn(), orig);
}
void FTP_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetTag(), Conn(), orig);
}

Some files were not shown because too many files have changed in this diff Show more