Merge remote-tracking branch 'origin/topic/jsiwek/file-analysis' into topic/seth/file-analysis-exe-analyzer

Conflicts:
	src/file_analysis/ActionSet.cc
	src/types.bif
This commit is contained in:
Seth Hall 2013-04-24 13:01:39 -04:00
commit d72980828f
141 changed files with 3754 additions and 888 deletions

View file

@ -17,12 +17,17 @@ set(BRO_SCRIPT_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts)
get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE) ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
"export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n"
"export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh
"setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n"
"setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1) file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1)
@ -69,6 +74,12 @@ if (MISSING_PREREQS)
message(FATAL_ERROR "Configuration aborted due to missing prerequisites") message(FATAL_ERROR "Configuration aborted due to missing prerequisites")
endif () endif ()
set(libmagic_req 5.04)
if ( LibMagic_VERSION VERSION_LESS ${libmagic_req} )
message(FATAL_ERROR "libmagic of at least version ${libmagic_req} required "
"(found ${LibMagic_VERSION})")
endif ()
include_directories(BEFORE include_directories(BEFORE
${PCAP_INCLUDE_DIR} ${PCAP_INCLUDE_DIR}
${OpenSSL_INCLUDE_DIR} ${OpenSSL_INCLUDE_DIR}
@ -190,6 +201,11 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING
PATTERN "COPYING" EXCLUDE
PATTERN "*"
)
######################################################################## ########################################################################
## Packaging Setup ## Packaging Setup

2
cmake

@ -1 +1 @@
Subproject commit 94e72a3075bb0b9550ad05758963afda394bfb2c Subproject commit e1a7fd00a0a66d6831a239fe84f5fcfaa54e2c35

29
magic/COPYING Normal file
View file

@ -0,0 +1,29 @@
# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
# Software written by Ian F. Darwin and others;
# maintained 1994- Christos Zoulas.
#
# This software is not subject to any export provision of the United States
# Department of Commerce, and may be exported to any country or planet.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice immediately at the beginning of the file, without modification,
# this list of conditions, and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

208
magic/animation Normal file
View file

@ -0,0 +1,208 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $
# animation: file(1) magic for animation/movie formats
#
# animation formats
# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
# SGI and Apple formats
0 string MOVI Silicon Graphics movie file
!:mime video/x-sgi-movie
4 string moov Apple QuickTime
!:mime video/quicktime
4 string mdat Apple QuickTime movie (unoptimized)
!:mime video/quicktime
#4 string wide Apple QuickTime movie (unoptimized)
#!:mime video/quicktime
#4 string skip Apple QuickTime movie (modified)
#!:mime video/quicktime
#4 string free Apple QuickTime movie (modified)
#!:mime video/quicktime
4 string idsc Apple QuickTime image (fast start)
!:mime image/x-quicktime
#4 string idat Apple QuickTime image (unoptimized)
#!:mime image/x-quicktime
4 string pckg Apple QuickTime compressed archive
!:mime application/x-quicktime-player
4 string/W jP JPEG 2000 image
!:mime image/jp2
4 string ftyp ISO Media
>8 string isom \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp41 \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp42 \b, MPEG v4 system, version 2
!:mime video/mp4
>8 string/W jp2 \b, JPEG 2000
!:mime image/jp2
>8 string 3ge \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gg \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gp \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gs \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3g2 \b, MPEG v4 system, 3GPP2
!:mime video/3gpp2
>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile
!:mime video/mp4
>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC
!:mime video/3gpp
>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC
!:mime audio/mp4
>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC
!:mime video/mp4
>8 string/W qt \b, Apple QuickTime movie
!:mime video/quicktime
# MPEG sequences
# Scans for all common MPEG header start codes
0 belong&0xFFFFFF00 0x00000100
>3 byte 0xBA MPEG sequence
!:mime video/mpeg
# GRR too general as it catches also FoxPro Memo example NG.FPT
>3 byte 0xB0 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB5 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB3 MPEG sequence
!:mime video/mpeg
# MPEG ADTS Audio (*.mpx/mxa/aac)
# from dreesen@math.fu-berlin.de
# modified to fully support MPEG ADTS
# MP3, M1A
# modified by Joerg Jenderek
# GRR the original test are too common for many DOS files
# so don't accept as MP3 until we've tested the rate
0 beshort&0xFFFE 0xFFFA
# rates
>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps
!:mime audio/mpeg
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
!:mime audio/mpeg
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
!:mime audio/mpeg
# MPA, M2A
0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2
!:mime audio/mpeg
# MP3, M25A
0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5
!:mime audio/mpeg
# Stored AAC streams (instead of the MP4 format)
0 string ADIF MPEG ADIF, AAC
!:mime audio/x-hx-aac-adif
# Live or stored single AAC stream (used with MPEG-2 systems)
0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC
!:mime audio/x-hx-aac-adts
# Live MPEG-4 audio streams (instead of RTP FlexMux)
0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS
!:mime audio/x-mp4a-latm
# This magic isn't strong enough (matches plausible ISO-8859-1 text)
#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream
#!:mime audio/x-mp4a-latm
# Summary: FLI animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF11
# standard FLI always has 320x200 resolution and 8 bit color
>8 leshort 320
>>10 leshort 200
>>>12 leshort 8 FLI animation, 320x200x8
!:mime video/x-fli
# Summary: FLC animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF12
# standard FLC always use 8 bit color
>12 leshort 8 FLC animation
!:mime video/x-flc
# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
0 belong 0x3026b275 Microsoft ASF
!:mime video/x-ms-asf
# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8aMNG MNG video data,
!:mime video/x-mng
# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8bJNG JNG video data,
!:mime video/x-jng
# VRML (Virtual Reality Modelling Language)
0 string/w #VRML\ V1.0\ ascii VRML 1 file
!:mime model/vrml
0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file
!:mime model/vrml
# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
# From Michel Briand <michelbriand@free.fr>
0 string/t \<?xml\ version="
!:strength +1
>20 search/1000/cw \<!DOCTYPE\ X3D X3D (Extensible 3D) model xml text
!:mime model/x3d
# MPEG file
# MPEG sequences
# FIXME: This section is from the old magic.mime file and needs integrating with the rest
0 belong 0x000001BA
>4 byte &0x40
!:mime video/mp2p
>4 byte ^0x40
!:mime video/mpeg
0 belong 0x000001BB
!:mime video/mpeg
0 belong 0x000001B0
!:mime video/mp4v-es
0 belong 0x000001B5
!:mime video/mp4v-es
0 belong 0x000001B3
!:mime video/mpv
0 belong&0xFF5FFF1F 0x47400010
!:mime video/mp2t
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264

242
magic/archive Normal file
View file

@ -0,0 +1,242 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
# pre-POSIX "tar" archives are handled in the C code.
# POSIX tar archives
257 string ustar\0 POSIX tar archive
!:mime application/x-tar # encoding: posix
257 string ustar\040\040\0 GNU tar archive
!:mime application/x-tar # encoding: gnu
# cpio archives
#
# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
# The idea is to indicate archives produced on machines with the same
# byte order as the machine running "file" with "cpio archive", and
# to indicate archives produced on machines with the opposite byte order
# from the machine running "file" with "byte-swapped cpio archive".
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
0 short 070707 cpio archive
!:mime application/x-cpio
0 short 0143561 byte-swapped cpio archive
!:mime application/x-cpio # encoding: swapped
#
# System V Release 1 portable(?) archive format.
#
0 string =<ar> System V Release 1 ar archive
!:mime application/x-archive
#
# Debian package; it's in the portable archive format, and needs to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "debian".
#
0 string =!<arch>\ndebian
!:mime application/x-debian-package
#
# MIPS archive; they're in the portable archive format, and need to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "__________E".
#
0 string =!<arch>\n__________E MIPS archive
!:mime application/x-archive
#
# BSD/SVR2-and-later portable archive formats.
#
0 string =!<arch> current ar archive
!:mime application/x-archive
# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
#
# The first byte is the magic (0x1a), byte 2 is the compression type for
# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
# filename of the first file (null terminated). Since some types collide
# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
!:mime application/x-arc
# [JW] stuff taken from idarc, obviously ARC successors:
0 lelong&0x8080ffff 0x00000a1a PAK archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000141a ARC+ archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000481a HYP archive data
!:mime application/x-arc
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data
!:mime application/x-arj
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lh0- LHarc 1.x/ARX archive data [lh0]
!:mime application/x-lharc
2 string -lh1- LHarc 1.x/ARX archive data [lh1]
!:mime application/x-lharc
2 string -lz4- LHarc 1.x archive data [lz4]
!:mime application/x-lharc
2 string -lz5- LHarc 1.x archive data [lz5]
!:mime application/x-lharc
# [never seen any but the last; -lh4- reported in comp.compression:]
2 string -lzs- LHa/LZS archive data [lzs]
!:mime application/x-lha
2 string -lh\40- LHa 2.x? archive data [lh ]
!:mime application/x-lha
2 string -lhd- LHa 2.x? archive data [lhd]
!:mime application/x-lha
2 string -lh2- LHa 2.x? archive data [lh2]
!:mime application/x-lha
2 string -lh3- LHa 2.x? archive data [lh3]
!:mime application/x-lha
2 string -lh4- LHa (2.x) archive data [lh4]
!:mime application/x-lha
2 string -lh5- LHa (2.x) archive data [lh5]
!:mime application/x-lha
2 string -lh6- LHa (2.x) archive data [lh6]
!:mime application/x-lha
2 string -lh7- LHa (2.x)/LHark archive data [lh7]
!:mime application/x-lha
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0 string Rar! RAR archive data,
!:mime application/x-rar
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
!:mime application/zip
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\003\004
# Specialised zip formats which start with a member named 'mimetype'
# (stored uncompressed, with no 'extra field') containing the file's MIME type.
# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
# contents starting with "application/":
>26 string \x8\0\0\0mimetypeapplication/
# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
# http://lists.oasis-open.org/archives/office/200505/msg00006.html
# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
>>50 string vnd.oasis.opendocument. OpenDocument
>>>73 string text
>>>>77 byte !0x2d Text
!:mime application/vnd.oasis.opendocument.text
>>>>77 string -template Text Template
!:mime application/vnd.oasis.opendocument.text-template
>>>>77 string -web HTML Document Template
!:mime application/vnd.oasis.opendocument.text-web
>>>>77 string -master Master Document
!:mime application/vnd.oasis.opendocument.text-master
>>>73 string graphics
>>>>81 byte !0x2d Drawing
!:mime application/vnd.oasis.opendocument.graphics
>>>>81 string -template Template
!:mime application/vnd.oasis.opendocument.graphics-template
>>>73 string presentation
>>>>85 byte !0x2d Presentation
!:mime application/vnd.oasis.opendocument.presentation
>>>>85 string -template Template
!:mime application/vnd.oasis.opendocument.presentation-template
>>>73 string spreadsheet
>>>>84 byte !0x2d Spreadsheet
!:mime application/vnd.oasis.opendocument.spreadsheet
>>>>84 string -template Template
!:mime application/vnd.oasis.opendocument.spreadsheet-template
>>>73 string chart
>>>>78 byte !0x2d Chart
!:mime application/vnd.oasis.opendocument.chart
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.chart-template
>>>73 string formula
>>>>80 byte !0x2d Formula
!:mime application/vnd.oasis.opendocument.formula
>>>>80 string -template Template
!:mime application/vnd.oasis.opendocument.formula-template
>>>73 string database Database
!:mime application/vnd.oasis.opendocument.database
>>>73 string image
>>>>78 byte !0x2d Image
!:mime application/vnd.oasis.opendocument.image
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.image-template
# EPUB (OEBPS) books using OCF (OEBPS Container Format)
# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
# From: Ralf Brown <ralf.brown@gmail.com>
>0x1E string mimetypeapplication/epub+zip EPUB document
!:mime application/epub+zip
# Catch other ZIP-with-mimetype formats
# In a ZIP file, the bytes immediately after a member's contents are
# always "PK". The 2 regex rules here print the "mimetype" member's
# contents up to the first 'P'. Luckily, most MIME types don't contain
# any capital 'P's. This is a kludge.
# (mimetype contains "application/<OTHER>")
>>50 string !epub+zip
>>>50 string !vnd.oasis.opendocument.
>>>>50 string !vnd.sun.xml.
>>>>>50 string !vnd.kde.
>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# (mimetype contents other than "application/*")
>26 string \x8\0\0\0mimetype
>>38 string !application/
>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# Java Jar files
>(26.s+30) leshort 0xcafe Java Jar file data (zip)
!:mime application/jar
# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
# Next line excludes specialized formats:
>(26.s+30) leshort !0xcafe
>>26 string !\x8\0\0\0mimetype Zip archive data
!:mime application/zip
# Zoo archiver
20 lelong 0xfdc4a7dc Zoo archive data
!:mime application/x-zoo
# Shell archives
10 string #\ This\ is\ a\ shell\ archive shell archive text
!:mime application/octet-stream
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
# EET archive
# From: Tilman Sauerbeck <tilman@code-monkey.de>
0 belong 0x1ee7ff00 EET archive
!:mime application/x-eet
# Symbian installation files
# http://www.thouky.co.uk/software/psifs/sis.html
# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8 lelong 0x10000419 Symbian installation file
!:mime application/vnd.symbian.install
0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
!:mime x-epoc/x-sisx-app

19
magic/assembler Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $
# make: file(1) magic for assembler source
#
0 regex \^[\020\t]*\\.asciiz assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.byte assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.even assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.globl assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.text assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.file assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.type assembler source text
!:mime text/x-asm

149
magic/audio Normal file
View file

@ -0,0 +1,149 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $
# audio: file(1) magic for sound formats (see also "iff")
#
# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
# and others
#
# Sun/NeXT audio data
0 string .snd Sun/NeXT audio data:
>12 belong 1 8-bit ISDN mu-law,
!:mime audio/basic
>12 belong 2 8-bit linear PCM [REF-PCM],
!:mime audio/basic
>12 belong 3 16-bit linear PCM,
!:mime audio/basic
>12 belong 4 24-bit linear PCM,
!:mime audio/basic
>12 belong 5 32-bit linear PCM,
!:mime audio/basic
>12 belong 6 32-bit IEEE floating point,
!:mime audio/basic
>12 belong 7 64-bit IEEE floating point,
!:mime audio/basic
>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-adpcm
# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
# that uses little-endian encoding and has a different magic number
0 lelong 0x0064732E DEC audio data:
>12 lelong 1 8-bit ISDN mu-law,
!:mime audio/x-dec-basic
>12 lelong 2 8-bit linear PCM [REF-PCM],
!:mime audio/x-dec-basic
>12 lelong 3 16-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 4 24-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 5 32-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 6 32-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 7 64-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-dec-basic
# Creative Labs AUDIO stuff
0 string MThd Standard MIDI data
!:mime audio/midi
0 string CTMF Creative Music (CMF) data
!:mime audio/x-unknown
0 string SBI SoundBlaster instrument data
!:mime audio/x-unknown
0 string Creative\ Voice\ File Creative Labs voice data
!:mime audio/x-unknown
# Real Audio (Magic .ra\0375)
0 belong 0x2e7261fd RealAudio sound file
!:mime audio/x-pn-realaudio
0 string .RMF\0\0\0 RealMedia file
!:mime application/vnd.rn-realmedia
# mime types according to http://www.geocities.com/nevilo/mod.htm:
# audio/it .it
# audio/x-zipped-it .itz
# audio/xm fasttracker modules
# audio/x-s3m screamtracker modules
# audio/s3m screamtracker modules
# audio/x-zipped-mod mdz
# audio/mod mod
# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
#
# Taken from loader code from mikmod version 2.14
# by Steve McIntyre (stevem@chiark.greenend.org.uk)
# <doj@cubic.org> added title printing on 2003-06-24
0 string MAS_UTrack_V00
>14 string >/0 ultratracker V1.%.1s module sound data
!:mime audio/x-mod
#audio/x-tracker-module
0 string Extended\ Module: Fasttracker II module sound data
!:mime audio/x-mod
#audio/x-tracker-module
21 string/c =!SCREAM! Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
21 string BMOD2STM Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
1080 string M.K. 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string M!K! 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string FLT4 4-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string FLT8 8-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string 4CHN 4-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 6CHN 6-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 8CHN 8-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string CD81 8-channel Octalyser module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
1080 string OKTA 8-channel Octalyzer module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
# Not good enough.
#1082 string CH
#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data
1080 string 16CN 16-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
1080 string 32CN 32-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
# Impulse tracker module (audio/x-it)
0 string IMPM Impulse Tracker module sound data -
!:mime audio/x-mod
# Free lossless audio codec <http://flac.sourceforge.net>
# From: Przemyslaw Augustyniak <silvathraec@rpg.pl>
0 string fLaC FLAC audio bitstream data
!:mime audio/x-flac
# Monkey's Audio compressed audio format (.ape)
# From danny.milo@gmx.net (Danny Milosavljevic)
# New version from Abel Cheung <abel (@) oaka.org>
0 string MAC\040 Monkey's Audio compressed format
!:mime audio/x-ape
# musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
0 string MP+ Musepack audio
!:mime audio/x-musepack

47
magic/c-lang Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
# c-lang: file(1) magic for C and related languages programs
#
# BCPL
0 search/8192 "libhdr" BCPL source text
!:mime text/x-bcpl
0 search/8192 "LIBHDR" BCPL source text
!:mime text/x-bcpl
# C
0 regex \^#include C source text
!:mime text/x-c
0 regex \^char C source text
!:mime text/x-c
0 regex \^double C source text
!:mime text/x-c
0 regex \^extern C source text
!:mime text/x-c
0 regex \^float C source text
!:mime text/x-c
0 regex \^struct C source text
!:mime text/x-c
0 regex \^union C source text
!:mime text/x-c
0 search/8192 main( C source text
!:mime text/x-c
# C++
# The strength of these rules is increased so they beat the C rules above
0 regex \^template C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^virtual C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^class C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^public: C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^private: C++ source text
!:strength + 5
!:mime text/x-c++

31
magic/cafebabe Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $
# Cafe Babes unite!
#
# Since Java bytecode and Mach-O universal binaries have the same magic number,
# the test must be performed in the same "magic" sequence to get both right.
# The long at offset 4 in a Mach-O universal binary tells the number of
# architectures; the short at offset 4 in a Java bytecode file is the JVM minor
# version and the short at offset 6 is the JVM major version. Since there are only
# only 18 labeled Mach-O architectures at current, and the first released
# Java class format was version 43.0, we can safely choose any number
# between 18 and 39 to test the number of architectures against
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
### JAVA START ###
0 belong 0xcafebabe
!:mime application/x-java-applet
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
### JAVA END ###

82
magic/commands Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $
# commands: file(1) magic for various shells and interpreters
#
#0 string/w : shell archive or script for antique kernel text
0 string/wt #!\ /bin/sh POSIX shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/csh C shell script text executable
!:mime text/x-shellscript
# korn shell magic, sent by George Wu, gwu@clyde.att.com
0 string/wt #!\ /bin/ksh Korn shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
#
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/local/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable
!:mime text/x-gawk
#
0 string/wt #!\ /bin/awk awk script text executable
!:mime text/x-awk
0 string/wt #!\ /usr/bin/awk awk script text executable
!:mime text/x-awk
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
0 string/wt #!\ /bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
# PHP scripts
# Ulf Harnhammar <ulfh@update.uu.se>
0 search/1/c =<?php PHP script text
!:strength + 10
!:mime text/x-php
0 search/1 =<?\n PHP script text
!:mime text/x-php
0 search/1 =<?\r PHP script text
!:mime text/x-php
0 search/1/w #!\ /usr/local/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
0 search/1/w #!\ /usr/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
# Smarty compiled template, http://www.smarty.net/
# Elan Ruusamae <glen@delfi.ee>
0 string =<?php\ /*\ Smarty\ version Smarty compiled template
>24 regex [0-9.]+ \b, version %s
!:mime text/x-php

77
magic/compress Normal file
View file

@ -0,0 +1,77 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $
# compress: file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
!:apple LZIVZIVU
# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
# * Original filename is only at offset 10 if "extra field" absent
# * Produce shorter output - notably, only report compression methods
# other than 8 ("deflate", the only method defined in RFC 1952).
0 string \037\213 gzip compressed data
!:mime application/x-gzip
# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
0 string \037\036 packed data
!:mime application/octet-stream
#
# This magic number is byte-order-independent.
0 short 0x1f1f old packed data
!:mime application/octet-stream
# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
0 short 0x1fff compacted data
!:mime application/octet-stream
# This string is valid for SunOS (BE) and a matching "short" is listed
# in the Ultrix (LE) magic file.
0 string \377\037 compacted data
!:mime application/octet-stream
0 short 0145405 huf output
!:mime application/octet-stream
# bzip2
0 string BZh bzip2 compressed data
!:mime application/x-bzip2
# lzip
0 string LZIP lzip compressed data
!:mime application/x-lzip
# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# http://www.7-zip.org or DOC/7zFormat.txt
#
0 string 7z\274\257\047\034 7-zip archive data,
>6 byte x version %d
>7 byte x \b.%d
!:mime application/x-7z-compressed
# Type: LZMA
0 lelong&0xffffff =0x5d
>12 leshort =0xff LZMA compressed data,
>>5 lequad =0xffffffffffffffff streamed
>>5 lequad !0xffffffffffffffff non-streamed, size %lld
!:mime application/x-lzma
# http://tukaani.org/xz/xz-file-format.txt
0 ustring \xFD7zXZ\x00 XZ compressed data
!:mime application/x-xz
# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
0 string LRZI LRZIP compressed data
>4 byte x - version %d
>5 byte x \b.%d
!:mime application/x-lrzip

47
magic/database Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $
# database: file(1) magic for various databases
#
# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
#
#
# GDBM magic numbers
# Will be maintained as part of the GDBM distribution in the future.
# <downsj@teeny.org>
0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian
!:mime application/x-gdbm
0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian
!:mime application/x-gdbm
0 string GDBM GNU dbm 2.x database
!:mime application/x-gdbm
#
# Berkeley DB
#
# Ian Darwin's file /etc/magic files: big/little-endian version.
#
# Hash 1.85/1.86 databases store metadata in network byte order.
# Btree 1.85/1.86 databases store the metadata in host byte order.
# Hash and Btree 2.X and later databases store the metadata in host byte order.
0 long 0x00061561 Berkeley DB
!:mime application/x-dbm
# MS Access database
4 string Standard\ Jet\ DB Microsoft Access Database
!:mime application/x-msaccess
4 string Standard\ ACE\ DB Microsoft Access Database
!:mime application/x-msaccess
# Tokyo Cabinet magic data
# http://tokyocabinet.sourceforge.net/index.html
0 string ToKyO\ CaBiNeT\n Tokyo Cabinet
>14 string x \b (%s)
>32 byte 0 \b, Hash
!:mime application/x-tokyocabinet-hash
>32 byte 1 \b, B+ tree
!:mime application/x-tokyocabinet-btree
>32 byte 2 \b, Fixed-length
!:mime application/x-tokyocabinet-fixed
>32 byte 3 \b, Table
!:mime application/x-tokyocabinet-table

25
magic/diff Normal file
View file

@ -0,0 +1,25 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $
# diff: file(1) magic for diff(1) output
#
0 search/1 diff\ diff output text
!:mime text/x-diff
0 search/1 ***\ diff output text
!:mime text/x-diff
0 search/1 Only\ in\ diff output text
!:mime text/x-diff
0 search/1 Common\ subdirectories:\ diff output text
!:mime text/x-diff
0 search/1 Index: RCS/CVS diff output text
!:mime text/x-diff
# unified diff
0 search/4096 ---\
>&0 search/1024 \n
>>&0 search/1 +++\
>>>&0 search/1024 \n
>>>>&0 search/1 @@ unified diff output text
!:mime text/x-diff
!:strength + 90

43
magic/elf Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# elf: file(1) magic for ELF executables
#
# We have to check the byte order flag to see what byte order all the
# other stuff in the header is in.
#
# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
#
# Created by: unknown
# Modified by (1): Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (2): Peter Tobias <tobias@server.et-inf.fho-emden.de> (core support)
# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk@ds9.au.s.shuttle.de> (fix of core support)
# Modified by (4): <gerardo.cacciari@gmail.com> (VMS Itanium)
# Modified by (5): Matthias Urlichs <smurf@debian.org> (Listing of many architectures)
0 string \177ELF ELF
>4 byte 0 invalid class
>4 byte 1 32-bit
>4 byte 2 64-bit
>5 byte 0 invalid byte order
>5 byte 1 LSB
>>16 leshort 0 no file type,
!:strength *2
!:mime application/octet-stream
>>16 leshort 1 relocatable,
!:mime application/x-object
>>16 leshort 2 executable,
!:mime application/x-executable
>>16 leshort 3 shared object,
!:mime application/x-sharedlib
>>16 leshort 4 core file
!:mime application/x-coredump
>5 byte 2 MSB
>>16 beshort 0 no file type,
!:mime application/octet-stream
>>16 beshort 1 relocatable,
!:mime application/x-object
>>16 beshort 2 executable,
!:mime application/x-executable
>>16 beshort 3 shared object,
!:mime application/x-sharedlib
>>16 beshort 4 core file,
!:mime application/x-coredump

34
magic/epoc Normal file
View file

@ -0,0 +1,34 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz <hpicollo@worldnet.fr> and Peter Breitenlohner <peb@mppmu.mpg.de>
# Useful information for improving this file can be found at:
# http://software.frodo.looijaard.name/psiconv/formats/Index.html
#------------------------------------------------------------------------------
0 lelong 0x10000037 Psion Series 5
>4 lelong 0x10000042 multi-bitmap image
!:mime image/x-epoc-mbm
>4 lelong 0x1000006D
>>8 lelong 0x1000007D Sketch image
!:mime image/x-epoc-sketch
>>8 lelong 0x1000007F Word file
!:mime application/x-epoc-word
>>8 lelong 0x10000085 OPL program (TextEd)
!:mime application/x-epoc-opl
>>8 lelong 0x10000088 Sheet file
!:mime application/x-epoc-sheet
>4 lelong 0x10000073 OPO module
!:mime application/x-epoc-opo
>4 lelong 0x10000074 OPL application
!:mime application/x-epoc-app
0 lelong 0x10000050 Psion Series 5
>4 lelong 0x1000006D database
>>8 lelong 0x10000084 Agenda file
!:mime application/x-epoc-agenda
>>8 lelong 0x10000086 Data file
!:mime application/x-epoc-data
>>8 lelong 0x10000CEA Jotter file
!:mime application/x-epoc-jotter

12
magic/filesystems Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $
# filesystems: file(1) magic for different filesystems
#
# CDROM Filesystems
# Modified for UDF by gerardo.cacciari@gmail.com
32769 string CD001 #
!:mime application/x-iso9660-image
37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
!:mime application/x-iso9660-image

18
magic/flash Normal file
View file

@ -0,0 +1,18 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $
# flash: file(1) magic for Macromedia Flash file format
#
# See
#
# http://www.macromedia.com/software/flash/open/
#
0 string FWS Macromedia Flash data,
>3 byte x version %d
!:mime application/x-shockwave-flash
0 string CWS Macromedia Flash data (compressed),
!:mime application/x-shockwave-flash
# From: Cal Peake <cp@absolutedigital.net>
0 string FLV Macromedia Flash Video
!:mime video/x-flv

32
magic/fonts Normal file
View file

@ -0,0 +1,32 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $
# fonts: file(1) magic for font data
#
# X11 font files in SNF (Server Natural Format) format
# updated by Joerg Jenderek at Feb 2013
# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm
0 belong 00000004 X11 SNF font data, MSB first
#>104 belong 00000004 X11 SNF font data, MSB first
!:mime application/x-font-sfn
# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX
0 lelong 00000004
>104 lelong 00000004 X11 SNF font data, LSB first
!:mime application/x-font-sfn
# True Type fonts
0 string \000\001\000\000\000 TrueType font data
!:mime application/x-font-ttf
# Opentype font data from Avi Bercovich
0 string OTTO OpenType font data
!:mime application/vnd.ms-opentype
# Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu
0 string SplineFontDB: Spline Font Database
!:mime application/vnd.font-fontforge-sfd
# EOT
34 string LP Embedded OpenType (EOT)
!:mime application/vnd.ms-fontobject

7
magic/fortran Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
# FORTRAN source
0 regex/100 \^[Cc][\ \t] FORTRAN program
!:mime text/x-fortran
!:strength - 5

31
magic/frame Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# frame: file(1) magic for FrameMaker files
#
# This stuff came on a FrameMaker demo tape, most of which is
# copyright, but this file is "published" as witness the following:
#
# Note that this is the Framemaker Maker Interchange Format, not the
# Normal format which would be application/vnd.framemaker.
#
0 string \<MakerFile FrameMaker document
!:mime application/x-mif
0 string \<MIFFile FrameMaker MIF (ASCII) file
!:mime application/x-mif
0 search/1 \<MakerDictionary FrameMaker Dictionary text
!:mime application/x-mif
0 string \<MakerScreenFont FrameMaker Font file
!:mime application/x-mif
0 string \<MML FrameMaker MML file
!:mime application/x-mif
0 string \<BookFile FrameMaker Book file
!:mime application/x-mif
# XXX - this book entry should be verified, if you find one, uncomment this
#0 string \<Book\ FrameMaker Book (ASCII) file
#!:mime application/x-mif
#>6 string 3.0 (3.0)
#>6 string 2.0 (2.0)
#>6 string 1.0 (1.0)
0 string \<Maker Intermediate Print File FrameMaker IPL file
!:mime application/x-mif

13
magic/gimp Normal file
View file

@ -0,0 +1,13 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
# GIMP Gradient: file(1) magic for the GIMP's gradient data files
# by Federico Mena <federico@nuclecu.unam.mx>
#------------------------------------------------------------------------------
# XCF: file(1) magic for the XCF image format used in the GIMP developed
# by Spencer Kimball and Peter Mattis
# ('Bucky' LaDieu, nega@vt.edu)
0 string gimp\ xcf GIMP XCF image data,
!:mime image/x-xcf

23
magic/gnu Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $
# gnu: file(1) magic for various GNU tools
#
# GNU nlsutils message catalog file format
#
# GNU message catalog (.mo and .gmo files)
# GnuPG
# The format is very similar to pgp
# Note: magic.mime had 0x8501 for the next line instead of 0x8502
0 beshort 0x8502 GPG encrypted data
!:mime text/PGP # encoding: data
# This magic is not particularly good, as the keyrings don't have true
# magic. Nevertheless, it covers many keyrings.
0 beshort 0x9901 GPG key public ring
!:mime application/x-gnupg-keyring
# gettext message catalogue
0 regex \^msgid\ GNU gettext message catalogue text
!:mime text/x-po

8
magic/gnumeric Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# gnumeric: file(1) magic for Gnumeric spreadsheet
# This entry is only semi-helpful, as Gnumeric compresses its files, so
# they will ordinarily reported as "compressed", but at least -z helps
39 string =<gmr:Workbook Gnumeric spreadsheet
!:mime application/x-gnumeric

51
magic/icc Normal file
View file

@ -0,0 +1,51 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# icc: file(1) magic for International Color Consortium file formats
#
# Color profiles as per the ICC's "Image technology colour management -
# Architecture, profile format, and data structure" specification.
# See
#
# http://www.color.org/specification/ICC1v43_2010-12.pdf
#
# for Specification ICC.1:2010 (Profile version 4.3.0.0).
#
# Bytes 36 to 39 contain a generic profile file signature of "acsp";
# bytes 40 to 43 "may be used to identify the primary platform/operating
# system framework for which the profile was created".
#
# There are other fields that might be worth dumping as well.
#
# This appears to be what's used for Apple ColorSync profiles.
# Instead of adding that, Apple just changed the generic "acsp" entry
# to be for "ColorSync ICC Color Profile" rather than "Kodak Color
# Management System, ICC Profile".
# Yes, it's "APPL", not "AAPL"; see the spec.
36 string acspAPPL ColorSync ICC Profile
!:mime application/vnd.iccprofile
# Microsoft ICM color profile
36 string acspMSFT Microsoft ICM Color Profile
!:mime application/vnd.iccprofile
# Yes, that's a blank after "SGI".
36 string acspSGI\ SGI ICC Profile
!:mime application/vnd.iccprofile
# XXX - is this what's used for the Sun KCMS or not? The standard file
# uses just "acsp" for that, but Apple's file uses it for "ColorSync",
# and there *is* an identified "primary platform" value of SUNW.
36 string acspSUNW Sun KCMS ICC Profile
!:mime application/vnd.iccprofile
# Any other profile.
# XXX - should we use "acsp\0\0\0\0" for "no primary platform" profiles,
# and use "acsp" for everything else and dump the "primary platform"
# string in those cases?
36 string acsp ICC Profile
!:mime application/vnd.iccprofile

21
magic/iff Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
# iff: file(1) magic for Interchange File Format (see also "audio" & "images")
#
# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic
# Arts for file interchange. It has also been used by Apple, SGI, and
# especially Commodore-Amiga.
#
# IFF files begin with an 8 byte FORM header, followed by a 4 character
# FORM type, which is followed by the first chunk in the FORM.
0 string FORM IFF data
#>4 belong x \b, FORM is %d bytes long
# audio formats
>8 string AIFF \b, AIFF audio
!:mime audio/x-aiff
>8 string AIFC \b, AIFF-C compressed audio
!:mime audio/x-aiff
>8 string 8SVX \b, 8SVX 8-bit sampled sound voice
!:mime audio/x-aiff

255
magic/images Normal file
View file

@ -0,0 +1,255 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $
# images: file(1) magic for image formats (see also "iff", and "c-lang" for
# XPM bitmaps)
#
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
# additions by janl@ifi.uio.no as well as others. Jan also suggested
# merging several one- and two-line files into here.
#
# little magic: PCX (first byte is 0x0a)
# PBMPLUS images
# The next byte following the magic is always whitespace.
# strength is changed to try these patterns before "x86 boot sector"
0 search/1 P1
>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 search/1 P2
>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 search/1 P3 Netpbm PPM image text
>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P4
>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 string P5
>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 string P6
>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P7 Netpbm PAM image file
!:mime image/x-portable-pixmap
# NIFF (Navy Interchange File Format, a modification of TIFF) images
# [GRR: this *must* go before TIFF]
0 string IIN1 NIFF image data
!:mime image/x-niff
# Canon RAW version 1 (CRW) files are a type of Canon Image File Format
# (CIFF) file. These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html
0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data
!:mime image/x-canon-crw
# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic
# number. Put this above the TIFF test to make sure we detect them.
# These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2
0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data
!:mime image/x-canon-cr2
# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
# The second word of TIFF files is the TIFF version number, 42, which has
# never changed. The TIFF specification recommends testing for it.
0 string MM\x00\x2a TIFF image data, big-endian
!:mime image/tiff
0 string II\x2a\x00 TIFF image data, little-endian
!:mime image/tiff
0 string MM\x00\x2b Big TIFF image data, big-endian
!:mime image/tiff
0 string II\x2b\x00 Big TIFF image data, little-endian
!:mime image/tiff
# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
# (Greg Roelofs, newt@uchicago.edu)
# (Albert Cahalan, acahalan@cs.uml.edu)
#
# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
#
0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
!:mime image/png
# possible GIF replacements; none yet released!
# (Greg Roelofs, newt@uchicago.edu)
#
# GRR 950115: this was mine ("Zip GIF"):
0 string GIF94z ZIF image (GIF+deflate alpha)
!:mime image/x-unknown
#
# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
#
0 string FGF95a FGF image (GIF+deflate beta)
!:mime image/x-unknown
#
# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
# (best; not yet implemented):
#
0 string PBF PBF image (deflate compression)
!:mime image/x-unknown
# GIF
0 string GIF8 GIF image data
!:mime image/gif
!:apple 8BIMGIFf
# From: Joerg Jenderek <joerg.jen.der.ek@gmx.net>
# most files with the extension .EPA and some with .BMP
0 string \x11\x06 Award BIOS Logo, 136 x 84
!:mime image/x-award-bioslogo
0 string \x11\x09 Award BIOS Logo, 136 x 126
!:mime image/x-award-bioslogo
#0 string \x07\x1f BIOS Logo corrupted?
# http://www.blackfiveservices.co.uk/awbmtools.shtml
# http://biosgfx.narod.ru/v3/
# http://biosgfx.narod.ru/abr-2/
0 string AWBM
>4 leshort <1981 Award BIOS bitmap
!:mime image/x-award-bmp
# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu)
0 string BM
>14 leshort 12 PC bitmap, OS/2 1.x format
!:mime image/x-ms-bmp
>14 leshort 64 PC bitmap, OS/2 2.x format
!:mime image/x-ms-bmp
>14 leshort 40 PC bitmap, Windows 3.x format
!:mime image/x-ms-bmp
>14 leshort 128 PC bitmap, Windows NT/2000 format
!:mime image/x-ms-bmp
# XPM icons (Greg Roelofs, newt@uchicago.edu)
0 search/1 /*\ XPM\ */ X pixmap image text
!:mime image/x-xpmi
# DICOM medical imaging data
128 string DICM DICOM medical imaging data
!:mime application/dicom
# XWD - X Window Dump file.
# As described in /usr/X11R6/include/X11/XWDFile.h
# used by the xwd program.
# Bradford Castalia, idaeim, 1/01
# updated by Adam Buchbinder, 2/09
# The following assumes version 7 of the format; the first long is the length
# of the header, which is at least 25 4-byte longs, and the one at offset 8
# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth,
# which is a maximum of 32.
0 belong >100
>8 belong <3
>>12 belong <33
>>>4 belong 7 XWD X Window Dump image data
!:mime image/x-xwindowdump
# PCX image files
# From: Dan Fandrich <dan@coneharvesters.com>
# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX
# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt
# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000
# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT
0 ubelong&0xffF8fe00 0x0a000000
# for PCX bit depth > 0
>3 ubyte >0
# test for valid versions
>>1 ubyte <6
>>>1 ubyte !1 PCX
!:mime image/x-pcx
# Adobe Photoshop
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string 8BPS Adobe Photoshop Image
!:mime image/vnd.adobe.photoshop
# Summary: DjVu image / document
# Extension: .djvu
# Reference: http://djvu.org/docs/DjVu3Spec.djvu
# Submitted by: Stephane Loeuillet <stephane.loeuillet@tiscali.fr>
# Modified by (1): Abel Cheung <abelcheung@gmail.com>
0 string AT&TFORM
>12 string DJVM DjVu multiple page document
!:mime image/vnd.djvu
>12 string DJVU DjVu image or single page document
!:mime image/vnd.djvu
>12 string DJVI DjVu shared document
!:mime image/vnd.djvu
>12 string THUM DjVu page thumbnails
!:mime image/vnd.djvu
# Originally by Marc Espie
# Modified by Robert Minsk <robertminsk at yahoo.com>
# http://www.openexr.com/openexrfilelayout.pdf
0 lelong 20000630 OpenEXR image data,
!:mime image/x-exr
# SMPTE Digital Picture Exchange Format, SMPTE DPX
#
# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital
# Moving-Picture Exchange (DPX), v1.0, 18 February 1994
# Robert Minsk <robertminsk at yahoo.com>
0 string SDPX DPX image data, big-endian,
!:mime image/x-dpx
#-----------------------------------------------------------------------
# Hierarchical Data Format, used to facilitate scientific data exchange
# specifications at http://hdf.ncsa.uiuc.edu/
0 belong 0x0e031301 Hierarchical Data Format (version 4) data
!:mime application/x-hdf
0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data
!:mime application/x-hdf
# http://www.cartesianinc.com/Tech/
0 string CPC\262 Cartesian Perceptual Compression image
!:mime image/x-cpi
# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches
# From: Markus Heidelberg <markus.heidelberg at web.de>
0 string/t [BitmapInfo2] Polar Monitor Bitmap text
!:mime image/x-polar-monitor-bitmap
# Type: Olympus ORF raw images.
# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
0 string MMOR Olympus ORF raw image data, big-endian
!:mime image/x-olympus-orf
0 string IIRO Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
0 string IIRS Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
# Type: Foveon X3F
# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# Note that the MIME type isn't defined anywhere that I can find; if
# there's a canonical type for this format, it should replace this one.
0 string FOVb Foveon X3F raw image data
!:mime image/x-x3f
# Paint.NET file
# From Adam Buchbinder <adam.buchbinder@gmail.com>
0 string PDN3 Paint.NET image data
!:mime image/x-paintnet

16
magic/java Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
# same magic number, 0xcafebabe, so they are both handled
# in the entry called "cafebabe".
#------------------------------------------------------------
0 belong 0xfeedfeed Java KeyStore
!:mime application/x-java-keystore
0 belong 0xcececece Java JCE KeyStore
!:mime application/x-java-jce-keystore
# Java source
0 regex ^import.*;$ Java source
!:mime text/x-java

17
magic/javascript Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: $
# javascript: magic for javascript and node.js scripts.
#
0 search/1/w #!/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ node Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable
!:mime application/javascript

31
magic/jpeg Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $
# JPEG images
# SunOS 5.5.1 had
#
# 0 string \377\330\377\340 JPEG file
# 0 string \377\330\377\356 JPG file
#
# both of which turn into "JPEG image data" here.
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
!:apple 8BIMJPEG
!:strength +2
# From: David Santinoli <david@santinoli.com>
0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000
# From: Johan van der Knijff <johan.vanderknijff@kb.nl>
# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes
# https://github.com/bitsgalore/jp2kMagic
#
# Now read value of 'Brand' field, which yields a few possibilities:
>20 string \x6a\x70\x32\x20 Part 1 (JP2)
!:mime image/jp2
>20 string \x6a\x70\x78\x20 Part 2 (JPX)
!:mime image/jpx
>20 string \x6a\x70\x6d\x20 Part 6 (JPM)
!:mime image/jpm
>20 string \x6d\x6a\x70\x32 Part 3 (MJ2)
!:mime video/mj2

11
magic/kde Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
# kde: file(1) magic for KDE
0 string/t [KDE\ Desktop\ Entry] KDE desktop entry
!:mime application/x-kdelnk
0 string/t #\ KDE\ Config\ File KDE config file
!:mime application/x-kdelnk
0 string/t #\ xmcd xmcd database file for kscd
!:mime text/x-xmcd

30
magic/kml Normal file
View file

@ -0,0 +1,30 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
# Type: Google KML, formerly Keyhole Markup Language
# Future development of this format has been handed
# over to the Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string/t \<?xml
>20 search/400 \ xmlns=
>>&0 regex ['"]http://earth.google.com/kml Google KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: OpenGIS KML, formerly Keyhole Markup Language
# This standard is maintained by the
# Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: Google KML Archive (ZIP based)
# http://code.google.com/apis/kml/documentation/kml_tut.html
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string PK\003\004
>4 byte 0x14
>>30 string doc.kml Compressed Google KML Document, including resources.
!:mime application/vnd.google-earth.kmz

22
magic/linux Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $
# linux: file(1) magic for Linux files
#
# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
# The following basic Linux magic is useful for reference, but using
# "long" magic is a better practice in order to avoid collisions.
#
# 2 leshort 100 Linux/i386
# >0 leshort 0407 impure executable (OMAGIC)
# >0 leshort 0410 pure executable (NMAGIC)
# >0 leshort 0413 demand-paged executable (ZMAGIC)
# >0 leshort 0314 demand-paged executable (QMAGIC)
#
# SYSLINUX boot logo files (from 'ppmtolss16' sources)
# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename:
# file extension .lss .16
0 lelong =0x1413f33d SYSLINUX' LSS16 image data
# syslinux-4.05/mime/image/x-lss16.xml
!:mime image/x-lss16

42
magic/lisp Normal file
View file

@ -0,0 +1,42 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# lisp: file(1) magic for lisp programs
#
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
# updated by Joerg Jenderek
# GRR: This lot is too weak
#0 string ;;
# windows INF files often begin with semicolon and use CRLF as line end
# lisp files are mainly created on unix system with LF as line end
#>2 search/4096 !\r Lisp/Scheme program text
#>2 search/4096 \r Windows INF file
0 search/4096 (setq\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defvar\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defparam\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defun\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (autoload\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (custom-set-variables\ Lisp/Scheme program text
!:mime text/x-lisp
# Emacs 18 - this is always correct, but not very magical.
0 string \012( Emacs v18 byte-compiled Lisp data
!:mime application/x-elc
# Emacs 19+ - ver. recognition added by Ian Springer
# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
# - Chris Chittleborough <cchittleborough@yahoo.com.au>
0 string ;ELC
>4 byte >18
>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data
!:mime application/x-elc
# From: David Allouche <david@allouche.net>
0 search/1 \<TeXmacs| TeXmacs document text
!:mime text/texmacs

17
magic/lua Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
# lua: file(1) magic for Lua scripting language
# URL: http://www.lua.org/
# From: Reuben Thomas <rrt@sc3d.org>, Seo Sanghyeon <tinuviel@sparcs.kaist.ac.kr>
# Lua scripts
0 search/1/w #!\ /usr/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1 #!/usr/bin/env\ lua Lua script text executable
!:mime text/x-lua
0 search/1 #!\ /usr/bin/env\ lua Lua script text executable
!:mime text/x-lua

7
magic/m4 Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for M4 scripts
#
0 regex \^dnl\ M4 macro processor script text
!:mime text/x-m4

21
magic/macintosh Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $
# macintosh description
#
# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
# Daniel Quinlan, quinlan@yggdrasil.com
11 string must\ be\ converted\ with\ BinHex BinHex binary text
!:mime application/mac-binhex40
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
!:mime application/x-stuffit
!:apple SIT!SIT!
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
!:apple SIT!SIT!
#>162 string >0 : %s

35
magic/mail.news Normal file
View file

@ -0,0 +1,35 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $
# mail.news: file(1) magic for mail and news
#
# Unfortunately, saved netnews also has From line added in some news software.
#0 string From mail text
0 string/t Relay-Version: old news text
!:mime message/rfc822
0 string/t #!\ rnews batched news text
!:mime message/rfc822
0 string/t N#!\ rnews mailed, batched news text
!:mime message/rfc822
0 string/t Forward\ to mail forwarding text
!:mime message/rfc822
0 string/t Pipe\ to mail piping text
!:mime message/rfc822
0 string/tc delivered-to: SMTP mail text
!:mime message/rfc822
0 string/tc return-path: SMTP mail text
!:mime message/rfc822
0 string/t Path: news text
!:mime message/news
0 string/t Xref: news text
!:mime message/news
0 string/t From: news or mail text
!:mime message/rfc822
0 string/t Article saved news text
!:mime message/news
0 string/t Received: RFC 822 mail text
!:mime message/rfc822
# TNEF files...
0 lelong 0x223E9F78 Transport Neutral Encapsulation Format
!:mime application/vnd.ms-tnef

16
magic/make Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for makefiles
#
0 regex \^CFLAGS makefile script text
!:mime text/x-makefile
0 regex \^LDFLAGS makefile script text
!:mime text/x-makefile
0 regex \^all: makefile script text
!:mime text/x-makefile
0 regex \^.PRECIOUS makefile script text
!:mime text/x-makefile
0 regex \^SUBDIRS automake makefile script text
!:mime text/x-makefile

29
magic/marc21 Normal file
View file

@ -0,0 +1,29 @@
# See COPYING file in this directory for original libmagic copyright.
#--------------------------------------------
# marc21: file(1) magic for MARC 21 Format
#
# Kevin Ford (kefo@loc.gov)
#
# MARC21 formats are for the representation and communication
# of bibliographic and related information in machine-readable
# form. For more info, see http://www.loc.gov/marc/
# leader position 20-21 must be 45
20 string 45
# leader starts with 5 digits, followed by codes specific to MARC format
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
!:mime application/marc
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
!:mime application/marc
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
!:mime application/marc
# leader position 22-23, should be "00" but is it?
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
!:mime application/marc

17
magic/matroska Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $
# matroska: file(1) magic for Matroska files
#
# See http://www.matroska.org/
#
# EBML id:
0 belong 0x1a45dfa3
# DocType id:
>4 search/4096 \x42\x82
# DocType contents:
>>&1 string webm WebM
!:mime video/webm
>>&1 string matroska Matroska data
!:mime video/x-matroska

9
magic/misctools Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#-----------------------------------------------------------------------------
# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $
# misctools: file(1) magic for miscellaneous UNIX tools.
#
0 string/c BEGIN:VCALENDAR vCalendar calendar file
!:mime text/calendar
0 string/c BEGIN:VCARD vCard visiting card
!:mime text/x-vcard

368
magic/msdos Normal file
View file

@ -0,0 +1,368 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
# msdos: file(1) magic for MS-DOS files
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek at Oct 2008,Apr 2011
0 string/t @
>1 string/cW \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW rem DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW set\ DOS batch file text
!:mime text/x-msdos-batch
# Tests for various EXE types.
#
# Many of the compressed formats were extraced from IDARC 1.23 source code.
#
0 string/b MZ DOS MZ
!:mime application/x-dosexec
# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
>0x18 leshort <0x40 MS-DOS executable
# These traditional tests usually work but not always. When test quality support is
# implemented these can be turned on.
#>>0x18 leshort 0x1c (Borland compiler)
#>>0x18 leshort 0x1e (MS compiler)
# If the relocation table is 0x40 or more bytes into the file, it's definitely
# not a DOS EXE.
>0x18 leshort >0x3f
# Maybe it's a PE?
>>(0x3c.l) string PE\0\0 PE
>>>(0x3c.l+24) leshort 0x010b \b32 executable
>>>(0x3c.l+24) leshort 0x020b \b32+ executable
>>>(0x3c.l+24) leshort 0x0107 ROM image
>>>(0x3c.l+24) default x Unknown PE signature
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
>>>(0x3c.l+92) leshort 1 (native)
>>>(0x3c.l+92) leshort 2 (GUI)
>>>(0x3c.l+92) leshort 3 (console)
>>>(0x3c.l+92) leshort 7 (POSIX)
>>>(0x3c.l+92) leshort 9 (Windows CE)
>>>(0x3c.l+92) leshort 10 (EFI application)
>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
>>>(0x3c.l+92) leshort 13 (EFI ROM)
>>>(0x3c.l+92) leshort 14 (XBOX)
>>>(0x3c.l+92) leshort 15 (Windows boot application)
>>>(0x3c.l+92) default x (Unknown subsystem
>>>>&0 leshort x 0x%x)
>>>(0x3c.l+4) leshort 0x14c Intel 80386
>>>(0x3c.l+4) leshort 0x166 MIPS R4000
>>>(0x3c.l+4) leshort 0x168 MIPS R10000
>>>(0x3c.l+4) leshort 0x184 Alpha
>>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3
>>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4
>>>(0x3c.l+4) leshort 0x1c0 ARM
>>>(0x3c.l+4) leshort 0x1c2 ARM Thumb
>>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb
>>>(0x3c.l+4) leshort 0x1f0 PowerPC
>>>(0x3c.l+4) leshort 0x200 Intel Itanium
>>>(0x3c.l+4) leshort 0x266 MIPS16
>>>(0x3c.l+4) leshort 0x268 Motorola 68000
>>>(0x3c.l+4) leshort 0x290 PA-RISC
>>>(0x3c.l+4) leshort 0x366 MIPSIV
>>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU
>>>(0x3c.l+4) leshort 0xebc EFI byte code
>>>(0x3c.l+4) leshort 0x8664 x86-64
>>>(0x3c.l+4) leshort 0xc0ee MSIL
>>>(0x3c.l+4) default x Unknown processor type
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB)
>>>(0x3c.l+22) leshort&0x1000 >0 system file
>>>(0x3c.l+24) leshort 0x010b
>>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
>>>(0x3c.l+24) leshort 0x020b
>>>>(0x3c.l+248) lelong >0 Mono/.Net assembly
# hooray, there's a DOS extender using the PE format, with a valid PE
# executable inside (which just prints a message and exits if run in win)
>>>(8.s*16) string 32STUB \b, 32rtm DOS extender
>>>(8.s*16) string !32STUB \b, for MS Windows
>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
>>>(0x3c.l+0xf8) search/0x140 UPX2
>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>(0x3c.l+0xf8) search/0x140 .idata
>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .rsrc
>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .data
>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
>>>>(0x3c.l+0xf7) byte x
>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
>>>0x30 string Inno \b, InnoSetup self-extracting archive
# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
# must be one of the unusual subformats.
>>(0x3c.l) string !PE\0\0 MS-DOS executable
>>(0x3c.l) string NE \b, NE
>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
>>>(0x3c.l+0x36) byte 3 for MS-DOS
>>>(0x3c.l+0x36) byte 4 for Windows 386
>>>(0x3c.l+0x36) byte 5 for Borland Operating System Services
>>>(0x3c.l+0x36) default x
>>>>(0x3c.l+0x36) byte x (unknown OS %x)
>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
>>(0x3c.l) string LX\0\0 \b, LX
>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
>>>(0x3c.l+0x0a) leshort 1 for OS/2
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
>>>(0x3c.l+0x08) leshort 1 i80286
>>>(0x3c.l+0x08) leshort 2 i80386
>>>(0x3c.l+0x08) leshort 3 i80486
>>>(8.s*16) string emx \b, emx
>>>>&1 string x %s
>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
# MS Windows system file, supposedly a collection of LE executables
>>(0x3c.l) string W3 \b, W3 for MS Windows
>>(0x3c.l) string LE\0\0 \b, LE executable
>>>(0x3c.l+0x0a) leshort 1
# some DOS extenders use LE files with OS/2 header
>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
# this is a wild guess; hopefully it is a specific signature
>>>>&0x24 lelong <0x50
>>>>>(&0x4c.l) string \xfc\xb8WATCOM
>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
# fails with DOS-Extenders.
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
# looks like ASCII, probably some embedded copyright message.
# and definitely not NE/LE/LX/PE
>>0x3c lelong >0x20000000
>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
# header data too small for extended executable
>2 long !0
>>0x18 leshort <0x40
>>>(4.s*512) leshort !0x014c
>>>>&(2.s-514) string !LE
>>>>>&-2 string !BW \b, MZ for MS-DOS
>>>>&(2.s-514) string LE \b, LE
>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
# educated guess since indirection is still not capable enough for complex offset
# calculations (next embedded executable would be at &(&2*512+&0-2)
# I suspect there are only LE executables in these multi-exe files
>>>>&(2.s-514) string BW
>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
# This sequence skips to the first COFF segment, usually .text
>(4.s*512) leshort 0x014c \b, COFF
>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
>>(8.s*16) string emx
>>>&1 string x for DOS, Win or OS/2, emx %s
>>&(&0x42.l-3) byte x
>>>&0x26 string UPX \b, UPX compressed
# and yet another guess: small .text, and after large .data is unusal, could be 32lite
>>&0x2c search/0xa0 .text
>>>&0x0b lelong <0x2000
>>>>&0 lelong >0x6000 \b, 32lite compressed
>(8.s*16) string $WdX \b, WDos/X DOS extender
# By now an executable type should have been printed out. The executable
# may be a self-uncompressing archive, so look for evidence of that and
# print it out.
#
# Some signatures below from Greg Roelofs, newt@uchicago.edu.
#
>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
>0xe7 string LH/2\ Self-Extract \b, %s
>0x1c string UC2X \b, UCEXE compressed
>0x1c string WWP\ \b, WWPACK compressed
>0x1c string RJSX \b, ARJ self-extracting archive
>0x1c string diet \b, diet compressed
>0x1c string LZ09 \b, LZEXE v0.90 compressed
>0x1c string LZ91 \b, LZEXE v0.91 compressed
>0x1c string tz \b, TinyProg compressed
>0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
!:mime application/zip
# Yes, this really is "Copr", not "Corp."
>0x1e string PKLITE\ Copr. Self-extracting PKZIP archive
!:mime application/zip
# winarj stores a message in the stub instead of the sig in the MZ header
>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
>0x20 string AIN
>>0x23 string 2 \b, AIN 2.x compressed
>>0x23 string <2 \b, AIN 1.x compressed
>>0x23 string >2 \b, AIN 1.x compressed
>0x24 string LHa's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string LHA's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string \ $ARX \b, ARX self-extracting archive
>0x24 string \ $LHarc \b, LHarc self-extracting archive
>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
>0x40 string aPKG \b, aPackage self-extracting archive
>0x64 string W\ Collis\0\0 \b, Compack compressed
>0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
>>&0xf4 search/0x140 \x0\x40\x1\x0
>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
>1638 string -lh5- \b, LHa self-extracting archive v2.13S
>0x17888 string Rar! \b, RAR self-extracting archive
# Skip to the end of the EXE. This will usually work fine in the PE case
# because the MZ image is hardcoded into the toolchain and almost certainly
# won't match any of these signatures.
>(4.s*512) long x
>>&(2.s-517) byte x
>>>&0 string PK\3\4 \b, ZIP self-extracting archive
>>>&0 string Rar! \b, RAR self-extracting archive
>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
>>>&7 search/400 **ACE** \b, ACE self-extracting archive
>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
# a few unknown ZIP sfxes, no idea if they are needed or if they are
# already captured by the generic patterns above
>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
#
# TELVOX Teleinformatica CODEC self-extractor for OS/2:
>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
>>49824 leshort =1 \b, 1 file
>>49824 leshort >1 \b, %u files
# Popular applications
2080 string Microsoft\ Word\ 6.0\ Document %s
!:mime application/msword
2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
!:mime application/msword
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
2112 string MSWordDoc Microsoft Word document data
!:mime application/msword
#
0 belong 0x31be0000 Microsoft Word Document
!:mime application/msword
#
0 string/b PO^Q` Microsoft Word 6.0 Document
!:mime application/msword
#
0 string/b \376\067\0\043 Microsoft Office Document
!:mime application/msword
0 string/b \333\245-\0\0\0 Microsoft Office Document
!:mime application/msword
512 string/b \354\245\301 Microsoft Word Document
!:mime application/msword
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
#
2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
!:mime application/vnd.ms-excel
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
!:mime application/vnd.ms-excel
#
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
2114 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
# Italian MS-Excel
2121 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
!:mime application/vnd.ms-excel
#
0 belong 0x00001a00 Lotus 1-2-3
!:mime application/x-123
#
0 belong 0x00000200 Lotus 1-2-3
!:mime application/x-123
0 string/b WordPro\0 Lotus WordPro
!:mime application/vnd.lotus-wordpro
0 string/b WordPro\r\373 Lotus WordPro
!:mime application/vnd.lotus-wordpro
# Windows icons (Ian Springer <ips@fpk.hp.com>)
0 string/b \000\000\001\000 MS Windows icon resource
!:mime image/x-icon
# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
# only for windows versions equal or greater 3.0
0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File
!:mime application/x-dosexec
# TNEF magic From "Joomy" <joomy@se-ed.net>
# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
0 leshort 0x223e9f78 TNEF
!:mime application/vnd.ms-tnef
#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed
# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
# False positive with PPT (also currently this string is too long)
#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document
#>48 byte 0x1B Excel Document
#!:mime application/vnd.ms-excel
>546 string bjbj Microsoft Word Document
!:mime application/msword
>546 string jbjb Microsoft Word Document
!:mime application/msword
0 string/b \224\246\056 Microsoft Word Document
!:mime application/msword
512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
!:mime application/msword
# MS eBook format (.lit)
0 string/b ITOLITLS Microsoft Reader eBook Data
>8 lelong x \b, version %u
!:mime application/x-ms-reader

12
magic/neko Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
# From: Mikhail Gusarov <dottedmag@dottedmag.net>
# NekoVM (http://nekovm.org/) bytecode
0 string NEKO NekoVM bytecode
>4 lelong x (%d global symbols,
>8 lelong x %d global fields,
>12 lelong x %d bytecode ops)
!:mime application/x-nekovm-bytecode

11
magic/pascal Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pascal: file(1) magic for Pascal source
#
0 search/8192 (input, Pascal source text
!:mime text/x-pascal
0 regex \^program Pascal source text
!:mime text/x-pascal
0 regex \^record Pascal source text
!:mime text/x-pascal

8
magic/pdf Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pdf: file(1) magic for Portable Document Format
#
0 string %PDF- PDF document
!:mime application/pdf

26
magic/perl Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $
# perl: file(1) magic for Larry Wall's perl language.
#
# The `eval' lines recognizes an outrageously clever hack.
# Keith Waclena <keith@cerberus.uchicago.edu>
# Send additions to <perl5-porters@perl.org>
0 search/1/w #!\ /bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/local/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text
!:mime text/x-perl
0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text
!:mime text/x-perl
0 search/1 #!/usr/bin/env\ perl Perl script text executable
!:mime text/x-perl
0 search/1 #!\ /usr/bin/env\ perl Perl script text executable
!:mime text/x-perl

27
magic/pgp Normal file
View file

@ -0,0 +1,27 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pgp: file(1) magic for Pretty Good Privacy
# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
#
0 beshort 0x9900 PGP key public ring
!:mime application/x-pgp-keyring
0 beshort 0x9501 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0x9500 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0xa600 PGP encrypted data
#!:mime application/pgp-encrypted
#0 string -----BEGIN\040PGP text/PGP armored data
!:mime text/PGP # encoding: armored data
#>15 string PUBLIC\040KEY\040BLOCK- public key block
#>15 string MESSAGE- message
#>15 string SIGNED\040MESSAGE- signed message
#>15 string PGP\040SIGNATURE- signature
2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block
!:mime application/pgp-keys
0 string -----BEGIN\040PGP\40MESSAGE- PGP message
!:mime application/pgp
0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature
!:mime application/pgp-signature

7
magic/pkgadd Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pkgadd: file(1) magic for SysV R4 PKG Datastreams
#
0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4)
!:mime application/x-svr4-package

14
magic/printer Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $
# printer: file(1) magic for printer-formatted files
#
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT

46
magic/python Normal file
View file

@ -0,0 +1,46 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $
# python: file(1) magic for python
#
0 search/1/w #!\ /usr/bin/python Python script text executable
!:mime text/x-python
0 search/1/w #!\ /usr/local/bin/python Python script text executable
!:mime text/x-python
0 search/1 #!/usr/bin/env\ python Python script text executable
!:mime text/x-python
0 search/1 #!\ /usr/bin/env\ python Python script text executable
!:mime text/x-python
# from module.submodule import func1, func2
0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
!:mime text/x-python
# def __init__ (self, ...):
0 search/4096 def\ __init__
>&0 search/64 self Python script text executable
!:mime text/x-python
# comments
0 search/4096 '''
>&0 regex .*'''$ Python script text executable
!:mime text/x-python
0 search/4096 """
>&0 regex .*"""$ Python script text executable
!:mime text/x-python
# try:
# except: or finally:
# block
0 search/4096 try:
>&0 regex \^\\s*except.*: Python script text executable
!:mime text/x-python
>&0 search/4096 finally: Python script text executable
!:mime text/x-python
# def name(args, args):
0 regex \^(\ |\\t)*def\ +[a-zA-Z]+
>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
!:mime text/x-python

36
magic/riff Normal file
View file

@ -0,0 +1,36 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $
# riff: file(1) magic for RIFF format
# See
#
# http://www.seanet.com/users/matts/riffmci/riffmci.htm
#
# AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
#
0 string RIFF RIFF (little-endian) data
# Microsoft WAVE format (*.wav)
>8 string WAVE \b, WAVE audio
!:mime audio/x-wav
# Corel Draw Picture
>8 string CDRA \b, Corel Draw Picture
!:mime image/x-coreldraw
# AVI == Audio Video Interleave
>8 string AVI\040 \b, AVI
!:mime video/x-msvideo
#------------------------------------------------------------------------------
# Sony Wave64
# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf
# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian
0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data
# 128 bit + total file size (64 bits) so 24 bytes
# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A }
>24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio
!:mime audio/x-w64
#------------------------------------------------------------------------------
# MBWF/RF64
# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf
0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio
!:mime audio/x-wav

12
magic/rpm Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $
#
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
#
0 belong 0xedabeedb RPM
!:mime application/x-rpm
#delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM
!:mime application/x-rpm

9
magic/rtf Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# rtf: file(1) magic for Rich Text Format (RTF)
#
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
#
0 string {\\rtf Rich Text Format data,
!:mime text/rtf

28
magic/ruby Normal file
View file

@ -0,0 +1,28 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $
# ruby: file(1) magic for Ruby scripting language
# URL: http://www.ruby-lang.org/
# From: Reuben Thomas <rrt@sc3d.org>
# Ruby scripts
0 search/1/w #!\ /usr/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!/usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
# What looks like ruby, but does not have a shebang
# (modules and such)
# From: Lubomir Rintel <lkundrak@v3.sk>
0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+'
>0 regex include\ [A-Z]|def\ [a-z]|\ do$
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text
!:mime text/x-ruby
0 regex \^[\ \t]*(class|module)[\ \t][A-Z]
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text
!:mime text/x-ruby

7
magic/sc Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# sc: file(1) magic for "sc" spreadsheet
#
38 string Spreadsheet sc spreadsheet file
!:mime application/x-sc

82
magic/sgml Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $
# Type: SVG Vectorial Graphics
# From: Noel Torres <tecnico@ejerciciosresueltos.com>
0 string \<?xml\ version="
>15 string >\0
>>19 search/4096 \<svg SVG Scalable Vector Graphics image
!:mime image/svg+xml
>>19 search/4096 \<gnc-v2 GnuCash file
!:mime application/x-gnucash
# Sitemap file
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096 \<urlset XML Sitemap document text
!:mime application/xml-sitemap
# xhtml
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version='
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<html broken XHTML document text
!:mime text/html
#------------------------------------------------------------------------------
# sgml: file(1) magic for Standard Generalized Markup Language
# HyperText Markup Language (HTML) is an SGML document type,
# from Daniel Quinlan (quinlan@yggdrasil.com)
# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
0 search/4096/cWt \<!doctype\ html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<head HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<title HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<script HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<style HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<table HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<a\ href= HTML document text
!:mime text/html
!:strength + 5
# Extensible markup language (XML), a subset of SGML
# from Marc Prud'hommeaux (marc@apocalypse.org)
0 search/1/cwt \<?xml XML document text
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version\ " XML
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version=" XML
!:mime application/xml
!:strength + 5
0 string \<?xml\ version=' XML
!:mime application/xml
!:strength + 5
0 search/1/wbt \<?xml XML document text
!:mime application/xml
!:strength - 10
0 search/1/wt \<?XML broken XML document text
!:mime application/xml
!:strength - 10

17
magic/sniffer Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# sniffer: file(1) magic for packet capture files
#
# From: guy@alum.mit.edu (Guy Harris)
#
#
# "libpcap" capture files.
# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
# the main program that uses that format, but there are other programs
# that use "libpcap", or that use the same capture file format.)
#
0 ubelong 0xa1b2c3d4 tcpdump capture file (big-endian)
!:mime application/vnd.tcpdump.pcap
0 ulelong 0xa1b2c3d4 tcpdump capture file (little-endian)
!:mime application/vnd.tcpdump.pcap

23
magic/tcl Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# file: file(1) magic for Tcl scripting language
# URL: http://www.tcl.tk/
# From: gustaf neumann
# Tcl scripts
0 search/1/w #!\ /usr/bin/tcl Tcl script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/local/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl

56
magic/tex Normal file
View file

@ -0,0 +1,56 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: tex,v 1.17 2010/09/20 19:19:17 rrt Exp $
# tex: file(1) magic for TeX files
#
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
#
# From <conklin@talisman.kaleida.com>
# Although we may know the offset of certain text fields in TeX DVI
# and font files, we can't use them reliably because they are not
# zero terminated. [but we do anyway, christos]
0 string \367\002 TeX DVI file
!:mime application/x-dvi
# There is no way to detect TeX Font Metric (*.tfm) files without
# breaking them apart and reading the data. The following patterns
# match most *.tfm files generated by METAFONT or afm2tfm.
2 string \000\021 TeX font metric data
!:mime application/x-tex-tfm
2 string \000\022 TeX font metric data
!:mime application/x-tex-tfm
# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/1 \\input\ texinfo Texinfo source text
!:mime text/x-texinfo
0 search/1 This\ is\ Info\ file GNU Info text
!:mime text/x-info
# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/4096 \\input TeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\section LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\setlength LaTeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\documentstyle LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\chapter LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\documentclass LaTeX 2e document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\relax LaTeX auxiliary file
!:mime text/x-tex
!:strength + 15
0 search/4096 \\contentsline LaTeX table of contents
!:mime text/x-tex
!:strength + 15
0 search/4096 %\ -*-latex-*- LaTeX document text
!:mime text/x-tex

22
magic/troff Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# troff: file(1) magic for *roff
#
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
# troff input
0 search/1 .\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '.\\" troff or preprocessor input text
!:mime text/troff
0 search/1 \\" troff or preprocessor input text
!:mime text/troff
0 search/1 ''' troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
!:mime text/troff

26
magic/vorbis Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# vorbis: file(1) magic for Ogg/Vorbis files
#
# From Felix von Leitner <leitner@fefe.de>
# Extended by Beni Cherniavsky <cben@crosswinds.net>
# Further extended by Greg Wooledge <greg@wooledge.org>
#
# Most (everything but the number of channels and bitrate) is commented
# out with `##' as it's not interesting to the average user. The most
# probable things advanced users would want to uncomment are probably
# the number of comments and the encoder version.
#
# FIXME: The first match has been made a search, so that it can skip
# over prepended ID3 tags. This will work for MIME type detection, but
# won't work for detecting other properties of the file (they all need
# to be made relative to the search). In any case, if the file has ID3
# tags, the ID3 information will be printed, not the Ogg information,
# so until that's fixed, this doesn't matter.
# FIXME[2]: Disable the above for now, since search assumes text mode.
#
# --- Ogg Framing ---
#0 search/1000 OggS Ogg data
0 string OggS Ogg data
!:mime application/ogg

14
magic/warc Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
# warc: file(1) magic for WARC files
0 string WARC/ WARC Archive
>5 string x version %.4s
!:mime application/warc
#------------------------------------------------------------------------------
# Arc File Format from Internet Archive
# see http://www.archive.org/web/researcher/ArcFileFormat.php
0 string filedesc:// Internet Archive File
!:mime application/x-ia-arc

19
magic/windows Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
# windows: file(1) magic for Microsoft Windows
#
# This file is mainly reserved for files where programs
# using them are run almost always on MS Windows 3.x or
# above, or files only used exclusively in Windows OS,
# where there is no better category to allocate for.
# For example, even though WinZIP almost run on Windows
# only, it is better to treat them as "archive" instead.
# For format usable in DOS, such as generic executable
# format, please specify under "msdos" file.
#
# From: Pal Tamas <folti@balabit.hu>
# Autorun File
0 string/c [autorun]\r\n Microsoft Windows Autorun file.
!:mime application/x-setupscript.

43
magic/wordprocessors Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $
# wordprocessors: file(1) magic fo word processors.
#
# Hangul (Korean) Word Processor File
# From: Won-Kyu Park <wkpark@kldp.org>
512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000
!:mime application/x-hwp
# Quark Express from http://www.garykessler.net/library/file_sigs.html
2 string MMXPR3 Motorola Quark Express Document (English)
!:mime application/x-quark-xpress-3
#------------------------------------------------------------------------------
# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
#
# Contributor kenzo-:
# Reversed-engineered JS Ichitaro magic numbers
#
0 string DOC
>43 byte 0x14 Just System Word Processor Ichitaro v4
!:mime application/x-ichitaro4
0 string DOC
>43 byte 0x15 Just System Word Processor Ichitaro v5
!:mime application/x-ichitaro5
0 string DOC
>43 byte 0x16 Just System Word Processor Ichitaro v6
!:mime application/x-ichitaro6
# Type: Freemind mindmap documents
# From: Jamie Thompson <debian-bugs@jamie-thompson.co.uk>
0 string/w \<map\ version Freemind document
!:mime application/x-freemind
# Type: Scribus
# From: Werner Fink <werner@suse.de>
0 string \<SCRIBUSUTF8NEW\ Version Scribus Document
!:mime application/x-scribus

11
magic/xwindows Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: xwindows,v 1.7 2011/05/03 01:44:17 christos Exp $
# xwindows: file(1) magic for various X/Window system file formats.
# Xcursor data
# X11 mouse cursor format defined in libXcursor, see
# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
0 string Xcur Xcursor data
!:mime image/x-xcursor

View file

@ -12,13 +12,13 @@ export {
LOG LOG
}; };
## A structure which represents a desired file analysis action to take. ## A structure which represents a desired type of file analysis.
type ActionArgs: record { type AnalyzerArgs: record {
## The type of action. ## The type of analysis.
act: Action; tag: Analyzer;
## The local filename to which to write an extracted file. Must be ## The local filename to which to write an extracted file. Must be
## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`. ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional; extract_filename: string &optional;
## An event which will be generated for all new file contents, ## An event which will be generated for all new file contents,
@ -46,6 +46,10 @@ export {
## path which was read, or some other input source. ## path which was read, or some other input source.
source: string &log &optional; source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen. ## The time at which the last activity for the file was seen.
last_active: time &log; last_active: time &log;
@ -60,8 +64,7 @@ export {
missing_bytes: count &log &default=0; missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that ## The number of not all-in-sequence bytes in the file stream that
## were delivered to file actions/analyzers due to reassembly buffer ## were delivered to file analyzers due to reassembly buffer overflow.
## overflow.
overflow_bytes: count &log &default=0; overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that ## The amount of time between receiving new data for this file that
@ -72,11 +75,6 @@ export {
## inspection in *bof_buffer* field. ## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional; bof_buffer_size: count &log &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or ## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs, ## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen. ## an initial guess of the mime type based on the first data seen.
@ -88,10 +86,10 @@ export {
## Connection UIDS over which the file was transferred. ## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log; conn_uids: set[string] &log;
## A set of action types taken during the file analysis. ## A set of analysis types done during the file analysis.
actions_taken: set[Action] &log; analyzers: set[Analyzer] &log;
## Local filenames of file extraction actions. ## Local filenames of extracted files.
extracted_files: set[string] &log; extracted_files: set[string] &log;
## An MD5 digest of the file contents. ## An MD5 digest of the file contents.
@ -120,10 +118,23 @@ export {
## generate two handles that would hash to the same file id. ## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef; const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file. ## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis ## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by ## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`. ## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
## ##
## f: the file. ## f: the file.
## ##
@ -131,26 +142,26 @@ export {
## for the *id* isn't currently active. ## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool; global postpone_timeout: function(f: fa_file): bool;
## Adds an action to the analysis of a given file. ## Adds an analyzer to the analysis of a given file.
## ##
## f: the file. ## f: the file.
## ##
## args: the action type to add along with any arguments it takes. ## args: the analyzer type to add along with any arguments it takes.
## ##
## Returns: true if the action will be added, or false if analysis ## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args* ## for the *id* isn't currently active or the *args*
## were invalid for the action type. ## were invalid for the analyzer type.
global add_action: function(f: fa_file, args: ActionArgs): bool; global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an action from the analysis of a given file. ## Removes an analyzer from the analysis of a given file.
## ##
## f: the file. ## f: the file.
## ##
## args: the action (type and args) to remove. ## args: the analyzer (type and args) to remove.
## ##
## Returns: true if the action will be removed, or false if analysis ## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active. ## for the *id* isn't currently active.
global remove_action: function(f: fa_file, args: ActionArgs): bool; global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file. ## Stops/ignores any further analysis of a given file.
## ##
@ -229,6 +240,7 @@ function set_info(f: fa_file)
f$info$id = f$id; f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id; if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source; if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active; f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes; f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
@ -236,34 +248,38 @@ function set_info(f: fa_file)
f$info$overflow_bytes = f$overflow_bytes; f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval; f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size; f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$file_type ) f$info$file_type = f$file_type;
if ( f?$mime_type ) f$info$mime_type = f$mime_type; if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns ) if ( f?$conns )
for ( cid in f$conns ) for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid]; add f$info$conn_uids[f$conns[cid]$uid];
} }
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool function postpone_timeout(f: fa_file): bool
{ {
return __postpone_timeout(f$id); return __postpone_timeout(f$id);
} }
function add_action(f: fa_file, args: ActionArgs): bool function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{ {
if ( ! __add_action(f$id, args) ) return F; if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f); set_info(f);
add f$info$actions_taken[args$act]; add f$info$analyzers[args$tag];
if ( args$act == FileAnalysis::ACTION_EXTRACT ) if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename]; add f$info$extracted_files[args$extract_filename];
return T; return T;
} }
function remove_action(f: fa_file, args: ActionArgs): bool function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{ {
return __remove_action(f$id, args); return __remove_analyzer(f$id, args);
} }
function stop(f: fa_file): bool function stop(f: fa_file): bool

View file

@ -316,7 +316,12 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional; tunnel: EncapsulatingConnVector &optional;
}; };
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef; const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef; const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic ## A file that Bro is analyzing. This is Bro's type for describing the basic
@ -336,6 +341,10 @@ type fa_file: record {
## path which was read, or some other input source. ## path which was read, or some other input source.
source: string &optional; source: string &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred. ## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional; conns: table[conn_id] of connection &optional;
@ -353,8 +362,7 @@ type fa_file: record {
missing_bytes: count &default=0; missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that ## The number of not all-in-sequence bytes in the file stream that
## were delivered to file actions/analyzers due to reassembly buffer ## were delivered to file analyzers due to reassembly buffer overflow.
## overflow.
overflow_bytes: count &default=0; overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that ## The amount of time between receiving new data for this file that
@ -369,11 +377,6 @@ type fa_file: record {
## This is also the buffer that's used for file/mime type detection. ## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional; bof_buffer: string &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or ## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs, ## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen. ## an initial guess of the mime type based on the first data seen.

View file

@ -11,7 +11,7 @@ export {
function get_handle_string(c: connection): string function get_handle_string(c: connection): string
{ {
return fmt("%s %s %s", ANALYZER_FTP_DATA, c$start_time, id_string(c$id)); return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
} }
function get_file_handle(c: connection, is_orig: bool): string function get_file_handle(c: connection, is_orig: bool): string

View file

@ -38,8 +38,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type ) if ( f?$mime_type && extract_file_types in f$mime_type )
{ {
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]); $extract_filename=get_extraction_name(f)]);
return; return;
} }
@ -55,8 +55,8 @@ event file_new(f: fa_file) &priority=5
if ( ! s$extract_file ) next; if ( ! s$extract_file ) next;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]); $extract_filename=get_extraction_name(f)]);
return; return;
} }
} }

View file

@ -56,8 +56,6 @@ export {
## Libmagic "sniffed" file type if the command indicates a file transfer. ## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional; mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer. ## Size of the file if the command indicates a file transfer.
file_size: count &log &optional; file_size: count &log &optional;
@ -205,7 +203,6 @@ function ftp_message(s: Info)
# and may not be used in all commands so they need reset to "blank" # and may not be used in all commands so they need reset to "blank"
# values after logging. # values after logging.
delete s$mime_type; delete s$mime_type;
delete s$mime_desc;
delete s$file_size; delete s$file_size;
# Same with data channel. # Same with data channel.
delete s$data_channel; delete s$data_channel;
@ -353,7 +350,6 @@ event file_transferred(c: connection, prefix: string, descr: string,
{ {
local s = ftp_data_expected[id$resp_h, id$resp_p]; local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1]; s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
} }
} }

View file

@ -15,11 +15,11 @@ function get_file_handle(c: connection, is_orig: bool): string
if ( ! c?$http ) return ""; if ( ! c?$http ) return "";
if ( c$http$range_request ) if ( c$http$range_request )
return fmt("%s %s %s %s", ANALYZER_HTTP, is_orig, c$id$orig_h, return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http)); build_url(c$http));
return fmt("%s %s %s %s %s", ANALYZER_HTTP, c$start_time, is_orig, return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, id_string(c$id)); c$http$trans_depth, " ", id_string(c$id));
} }
module GLOBAL; module GLOBAL;

View file

@ -44,8 +44,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type ) if ( f?$mime_type && extract_file_types in f$mime_type )
{ {
fname = get_extraction_name(f); fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]); $extract_filename=fname]);
for ( cid in f$conns ) for ( cid in f$conns )
{ {
@ -68,8 +68,8 @@ event file_new(f: fa_file) &priority=5
if ( ! c$http$extract_file ) next; if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f); fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]); $extract_filename=fname]);
extracting = T; extracting = T;
break; break;
} }

View file

@ -30,7 +30,7 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && generate_md5 in f$mime_type ) if ( f?$mime_type && generate_md5 in f$mime_type )
{ {
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return; return;
} }
@ -44,7 +44,7 @@ event file_new(f: fa_file) &priority=5
if ( ! c$http$calc_md5 ) next; if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return; return;
} }
} }

View file

@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5
c$http$mime_type = f$mime_type; c$http$mime_type = f$mime_type;
local mime_str: string = split1(f$mime_type, /;/)[1]; local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next; if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next; if ( ! c$http?$uri ) next;
@ -66,23 +66,18 @@ event file_new(f: fa_file) &priority=5
} }
} }
event file_over_new_connection(f: fa_file) &priority=5 event file_over_new_connection(f: fa_file, c: connection) &priority=5
{ {
if ( ! f?$source ) return; if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return; if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return; if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return; if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only # Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent # happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type # connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this). # field if we don't do this).
for ( cid in f$conns ) c$http$mime_type = f$mime_type;
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
}
} }
# Tracks byte-range request / partial content response mime types, indexed # Tracks byte-range request / partial content response mime types, indexed

View file

@ -101,8 +101,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type ) if ( f?$mime_type && extract_file_types in f$mime_type )
{ {
fname = get_extraction_name(f); fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]); $extract_filename=fname]);
set_dcc_extraction_file(f, fname); set_dcc_extraction_file(f, fname);
return; return;
} }
@ -120,8 +120,8 @@ event file_new(f: fa_file) &priority=5
if ( ! s$extract_file ) next; if ( ! s$extract_file ) next;
fname = get_extraction_name(f); fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]); $extract_filename=fname]);
s$extraction_file = fname; s$extraction_file = fname;
return; return;
} }

View file

@ -12,7 +12,7 @@ export {
function get_file_handle(c: connection, is_orig: bool): string function get_file_handle(c: connection, is_orig: bool): string
{ {
if ( is_orig ) return ""; if ( is_orig ) return "";
return fmt("%s %s %s", ANALYZER_IRC_DATA, c$start_time, id_string(c$id)); return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
} }
module GLOBAL; module GLOBAL;

View file

@ -123,8 +123,9 @@ event file_new(f: fa_file) &priority=5
if ( ! extracting ) if ( ! extracting )
{ {
fname = get_extraction_name(f); fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f,
$extract_filename=fname]); [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T; extracting = T;
++extract_count; ++extract_count;
} }
@ -133,7 +134,7 @@ event file_new(f: fa_file) &priority=5
} }
if ( c$smtp$current_entity$calc_md5 ) if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
} }
} }
@ -141,12 +142,12 @@ function check_extract_by_type(f: fa_file)
{ {
if ( extract_file_types !in f$mime_type ) return; if ( extract_file_types !in f$mime_type ) return;
if ( f?$info && FileAnalysis::ACTION_EXTRACT in f$info$actions_taken ) if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
return; return;
local fname: string = get_extraction_name(f); local fname: string = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]); $extract_filename=fname]);
if ( ! f?$conns ) return; if ( ! f?$conns ) return;
@ -163,7 +164,7 @@ function check_md5_by_type(f: fa_file)
if ( never_calc_md5 ) return; if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return; if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
} }
event file_new(f: fa_file) &priority=5 event file_new(f: fa_file) &priority=5

View file

@ -13,8 +13,8 @@ export {
function get_file_handle(c: connection, is_orig: bool): string function get_file_handle(c: connection, is_orig: bool): string
{ {
if ( ! c?$smtp ) return ""; if ( ! c?$smtp ) return "";
return fmt("%s %s %s %s", ANALYZER_SMTP, c$start_time, return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp$trans_depth, c$smtp_state$mime_level); c$smtp_state$mime_level);
} }
module GLOBAL; module GLOBAL;

View file

@ -26,6 +26,6 @@ event file_new(f: fa_file) &priority=5
if ( ! f?$source ) return; if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return; if ( f$source != "SMTP" ) return;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_DATA_EVENT, FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT,
$stream_event=intel_mime_data]); $stream_event=intel_mime_data]);
} }

View file

@ -457,8 +457,8 @@ set(bro_SRCS
file_analysis/File.cc file_analysis/File.cc
file_analysis/FileTimer.cc file_analysis/FileTimer.cc
file_analysis/FileID.h file_analysis/FileID.h
file_analysis/Action.h file_analysis/Analyzer.h
file_analysis/ActionSet.cc file_analysis/AnalyzerSet.cc
file_analysis/Extract.cc file_analysis/Extract.cc
file_analysis/Hash.cc file_analysis/Hash.cc
file_analysis/DataEvent.cc file_analysis/DataEvent.cc

View file

@ -7000,17 +7000,49 @@ event event_queue_flush_point%(%);
## .. bro:see:: set_file_handle ## .. bro:see:: set_file_handle
event get_file_handle%(tag: count, c: connection, is_orig: bool%); event get_file_handle%(tag: count, c: connection, is_orig: bool%);
# TODO: document ## Indicates that a analysis of a new file has begun. The analysis can be
## augmented at this time via :bro:see:`FileAnalysis::add_analyzer`.
##
## f: The file.
##
## .. bro:see:: file_over_new_connection file_timeout file_gap file_state_remove
event file_new%(f: fa_file%); event file_new%(f: fa_file%);
# TODO: give the new connection
event file_over_new_connection%(f: fa_file%); ## Indicates that a file has been seen being transferred over a connection
## different from the original.
##
## f: The file.
##
## c: The new connection over which the file is seen being transferred.
##
## .. bro:see:: file_new file_timeout file_gap file_state_remove
event file_over_new_connection%(f: fa_file, c: connection%);
## Indicates that file analysis has timed out because no activity was seen
## for the file in a while.
##
## f: The file.
##
## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove
## default_file_timeout_interval FileAnalysis::postpone_timeout
## FileAnalysis::set_timeout_interval
event file_timeout%(f: fa_file%); event file_timeout%(f: fa_file%);
# TODO: give size of gap
event file_gap%(f: fa_file%); ## Indicates that a chunk of the file is missing.
##
## f: The file.
##
## offset: The byte offset from the start of the file at which the gap begins.
##
## len: The number of missing bytes.
##
## .. bro:see:: file_new file_over_new_connection file_timeout file_state_remove
event file_gap%(f: fa_file, offset: count, len: count%);
## This event is generated each time file analysis is ending for a given file. ## This event is generated each time file analysis is ending for a given file.
## ##
## f: The file. ## f: The file.
## .. bro:see:: file_new file_over_new_connection file_timeout file_gap
event file_state_remove%(f: fa_file%); event file_state_remove%(f: fa_file%);
## This event is generated each time file analysis generates a digest of the ## This event is generated each time file analysis generates a digest of the
@ -7022,8 +7054,8 @@ event file_state_remove%(f: fa_file%);
## ##
## hash: The result of the hashing. ## hash: The result of the hashing.
## ##
## .. bro:see:: FileAnalysis::add_action FileAnalysis::ACTION_MD5 ## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5
## FileAnalysis::ACTION_SHA1 FileAnalysis::ACTION_SHA256 ## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256
event file_hash%(f: fa_file, kind: string, hash: string%); event file_hash%(f: fa_file, kind: string, hash: string%);

View file

@ -14,24 +14,33 @@ function FileAnalysis::__postpone_timeout%(file_id: string%): bool
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
## :bro:see:`FileAnalysis::add_action`. ## :bro:see:`FileAnalysis::set_timeout_interval`.
function FileAnalysis::__add_action%(file_id: string, args: any%): bool function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool
%{ %{
using file_analysis::FileID; using file_analysis::FileID;
using BifType::Record::FileAnalysis::ActionArgs; bool result = file_mgr->SetTimeoutInterval(FileID(file_id->CheckString()),
RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs); t);
bool result = file_mgr->AddAction(FileID(file_id->CheckString()), rv); return new Val(result, TYPE_BOOL);
%}
## :bro:see:`FileAnalysis::add_analyzer`.
function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
%{
using file_analysis::FileID;
using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->AddAnalyzer(FileID(file_id->CheckString()), rv);
Unref(rv); Unref(rv);
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
## :bro:see:`FileAnalysis::remove_action`. ## :bro:see:`FileAnalysis::remove_analyzer`.
function FileAnalysis::__remove_action%(file_id: string, args: any%): bool function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
%{ %{
using file_analysis::FileID; using file_analysis::FileID;
using BifType::Record::FileAnalysis::ActionArgs; using BifType::Record::FileAnalysis::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs); RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()), rv); bool result = file_mgr->RemoveAnalyzer(FileID(file_id->CheckString()), rv);
Unref(rv); Unref(rv);
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
@ -82,16 +91,14 @@ function FileAnalysis::__eof%(source: string%): any
module GLOBAL; module GLOBAL;
## For use within a :bro:see:`get_file_handle` handler to return a unique ## For use within a :bro:see:`get_file_handle` handler to set a unique
## identifier to associate with some buffered input to the file analysis ## identifier to associate with the current input to the file analysis
## framework. The buffered data will then immediately be allowed to pass ## framework. Using an empty string for the handle signifies that the
## pass through the file analysis framework and execute any policy hooks ## input will be ignored/discarded.
## that are available. If an empty string is returned, that signifies that
## the buffered data will be discarded with no further action taken on it.
## ##
## handle: A string that uniquely identifies a file. ## handle: A string that uniquely identifies a file.
## ##
## .. bro:see:: get_file_handle FileAnalysis::policy ## .. bro:see:: get_file_handle
function set_file_handle%(handle: string%): any function set_file_handle%(handle: string%): any
%{ %{
file_mgr->SetHandle(handle->CheckString()); file_mgr->SetHandle(handle->CheckString());

View file

@ -1,101 +0,0 @@
#ifndef FILE_ANALYSIS_ACTION_H
#define FILE_ANALYSIS_ACTION_H
#include "Val.h"
#include "NetVar.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Action ActionTag;
class File;
/**
* Base class for actions that can be attached to a file_analysis::File object.
*/
class Action {
public:
virtual ~Action()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy action %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the action/analyzer
* may be deleted/cleaned up soon.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile()
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* @return true if the action is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the action type enum value.
*/
ActionTag Tag() const { return tag; }
/**
* @return the ActionArgs associated with the aciton.
*/
RecordVal* Args() const { return args; }
/**
* @return the file_analysis::File object to which the action is attached.
*/
File* GetFile() const { return file; }
/**
* @return the action tag equivalent of the 'act' field from the ActionArgs
* value \a args.
*/
static ActionTag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::ActionArgs;
return static_cast<ActionTag>(
args->Lookup(ActionArgs->FieldOffset("act"))->AsEnum());
}
protected:
Action(RecordVal* arg_args, File* arg_file)
: tag(Action::ArgsTag(arg_args)), args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{}
ActionTag tag;
RecordVal* args;
File* file;
};
typedef Action* (*ActionInstantiator)(RecordVal* args, File* file);
} // namespace file_analysis
#endif

View file

@ -1,189 +0,0 @@
#include "ActionSet.h"
#include "File.h"
#include "Action.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
#include "analyzers/PE.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static ActionInstantiator action_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
file_analysis::PE_Analyzer::Instantiate,
};
static void action_del_func(void* v)
{
delete (Action*) v;
}
ActionSet::ActionSet(File* arg_file) : file(arg_file)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref());
action_hash = new CompositeHash(t);
Unref(t);
action_map.SetDeleteFunc(action_del_func);
}
ActionSet::~ActionSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete action_hash;
}
bool ActionSet::AddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d skipped for file id"
" %s: already exists", Action::ArgsTag(args),
file->GetID().c_str());
delete key;
return true;
}
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
InsertAction(act, key);
return true;
}
bool ActionSet::QueueAddAction(RecordVal* args)
{
HashKey* key = GetKey(args);
Action* act = InstantiateAction(args);
if ( ! act )
{
delete key;
return false;
}
mod_queue.push(new Add(act, key));
return true;
}
bool ActionSet::Add::Perform(ActionSet* set)
{
if ( set->action_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for file id"
" %s: already exists", act->Tag(),
act->GetFile()->GetID().c_str());
Abort();
return true;
}
set->InsertAction(act, key);
return true;
}
bool ActionSet::RemoveAction(const RecordVal* args)
{
return RemoveAction(Action::ArgsTag(args), GetKey(args));
}
bool ActionSet::RemoveAction(ActionTag tag, HashKey* key)
{
Action* act = (Action*) action_map.Remove(key);
delete key;
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s",
tag, file->GetID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(),
file->GetID().c_str());
delete act;
return true;
}
bool ActionSet::QueueRemoveAction(const RecordVal* args)
{
HashKey* key = GetKey(args);
ActionTag tag = Action::ArgsTag(args);
mod_queue.push(new Remove(tag, key));
return action_map.Lookup(key);
}
bool ActionSet::Remove::Perform(ActionSet* set)
{
return set->RemoveAction(tag, key);
}
HashKey* ActionSet::GetKey(const RecordVal* args) const
{
HashKey* key = action_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("ActionArgs type mismatch");
return key;
}
Action* ActionSet::InstantiateAction(RecordVal* args) const
{
Action* act = action_factory[Action::ArgsTag(args)](args, file);
if ( ! act )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d failed for file id",
" %s", Action::ArgsTag(args), file->GetID().c_str());
return 0;
}
return act;
}
void ActionSet::InsertAction(Action* act, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(),
file->GetID().c_str());
action_map.Insert(key, act);
delete key;
}
void ActionSet::DrainModifications()
{
if ( mod_queue.empty() ) return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start flushing action mod queue of file id %s",
file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing action mod queue of file id %s",
file->GetID().c_str());
}

View file

@ -1,109 +0,0 @@
#ifndef FILE_ANALYSIS_ACTIONSET_H
#define FILE_ANALYSIS_ACTIONSET_H
#include <queue>
#include "Action.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class File;
declare(PDict,Action);
/**
* A set of file analysis actions indexed by ActionArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class ActionSet {
public:
ActionSet(File* arg_file);
~ActionSet();
/**
* @return true if action was instantiated/attached, else false.
*/
bool AddAction(RecordVal* args);
/**
* @return true if action was able to be instantiated, else false.
*/
bool QueueAddAction(RecordVal* args);
/**
* @return false if action didn't exist and so wasn't removed, else true.
*/
bool RemoveAction(const RecordVal* args);
/**
* @return true if action exists at time of call, else false;
*/
bool QueueRemoveAction(const RecordVal* args);
/**
* Perform all queued modifications to the currently active actions.
*/
void DrainModifications();
IterCookie* InitForIteration() const
{ return action_map.InitForIteration(); }
Action* NextEntry(IterCookie* c)
{ return action_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
Action* InstantiateAction(RecordVal* args) const;
void InsertAction(Action* act, HashKey* key);
bool RemoveAction(ActionTag tag, HashKey* key);
File* file;
CompositeHash* action_hash; /**< ActionArgs hashes Action map lookup. */
PDict(Action) action_map; /**< Actions indexed by ActionArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(ActionSet* set) = 0;
virtual void Abort() = 0;
};
class Add : public Modification {
public:
Add(Action* arg_act, HashKey* arg_key)
: Modification(), act(arg_act), key(arg_key) {}
virtual ~Add() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete act; delete key; }
protected:
Action* act;
HashKey* key;
};
class Remove : public Modification {
public:
Remove(ActionTag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~Remove() {}
virtual bool Perform(ActionSet* set);
virtual void Abort() { delete key; }
protected:
ActionTag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -0,0 +1,103 @@
#ifndef FILE_ANALYSIS_ANALYZER_H
#define FILE_ANALYSIS_ANALYZER_H
#include "Val.h"
#include "NetVar.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
class File;
/**
* Base class for analyzers that can be attached to file_analysis::File objects.
*/
class Analyzer {
public:
virtual ~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile()
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the analyzer type enum value.
*/
FA_Tag Tag() const { return tag; }
/**
* @return the AnalyzerArgs associated with the analyzer.
*/
RecordVal* Args() const { return args; }
/**
* @return the file_analysis::File object to which the analyzer is attached.
*/
File* GetFile() const { return file; }
/**
* @return the analyzer tag equivalent of the 'tag' field from the
* AnalyzerArgs value \a args.
*/
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
return static_cast<FA_Tag>(
args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
}
protected:
Analyzer(RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)),
args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{}
FA_Tag tag;
RecordVal* args;
File* file;
};
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,
File* file);
} // namespace file_analysis
#endif

View file

@ -0,0 +1,188 @@
#include "AnalyzerSet.h"
#include "File.h"
#include "Analyzer.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static AnalyzerInstantiator analyzer_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
};
static void analyzer_del_func(void* v)
{
delete (file_analysis::Analyzer*) v;
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref());
analyzer_hash = new CompositeHash(t);
Unref(t);
analyzer_map.SetDeleteFunc(analyzer_del_func);
}
AnalyzerSet::~AnalyzerSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete analyzer_hash;
}
bool AnalyzerSet::Add(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id"
" %s: already exists", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
delete key;
return true;
}
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
Insert(a, key);
return true;
}
bool AnalyzerSet::QueueAdd(RecordVal* args)
{
HashKey* key = GetKey(args);
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
mod_queue.push(new AddMod(a, key));
return true;
}
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{
if ( set->analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id"
" %s: already exists", a->Tag(), a->GetFile()->GetID().c_str());
Abort();
return true;
}
set->Insert(a, key);
return true;
}
bool AnalyzerSet::Remove(const RecordVal* args)
{
return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args));
}
bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
{
file_analysis::Analyzer* a =
(file_analysis::Analyzer*) analyzer_map.Remove(key);
delete key;
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s",
tag, file->GetID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
delete a;
return true;
}
bool AnalyzerSet::QueueRemove(const RecordVal* args)
{
HashKey* key = GetKey(args);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
mod_queue.push(new RemoveMod(tag, key));
return analyzer_map.Lookup(key);
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
{
return set->Remove(tag, key);
}
HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
{
HashKey* key = analyzer_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch");
return key;
}
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
{
file_analysis::Analyzer* a =
analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file);
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id",
" %s", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
return 0;
}
return a;
}
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
analyzer_map.Insert(key, a);
delete key;
}
void AnalyzerSet::DrainModifications()
{
if ( mod_queue.empty() ) return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s",
file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s",
file->GetID().c_str());
}

View file

@ -0,0 +1,109 @@
#ifndef FILE_ANALYSIS_ANALYZERSET_H
#define FILE_ANALYSIS_ANALYZERSET_H
#include <queue>
#include "Analyzer.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class File;
declare(PDict,Analyzer);
/**
* A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class AnalyzerSet {
public:
AnalyzerSet(File* arg_file);
~AnalyzerSet();
/**
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(RecordVal* args);
/**
* @return true if analyzer was able to be instantiated, else false.
*/
bool QueueAdd(RecordVal* args);
/**
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const RecordVal* args);
/**
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const RecordVal* args);
/**
* Perform all queued modifications to the currently active analyzers.
*/
void DrainModifications();
IterCookie* InitForIteration() const
{ return analyzer_map.InitForIteration(); }
file_analysis::Analyzer* NextEntry(IterCookie* c)
{ return analyzer_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
void Insert(file_analysis::Analyzer* a, HashKey* key);
bool Remove(FA_Tag tag, HashKey* key);
File* file;
CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */
PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(AnalyzerSet* set) = 0;
virtual void Abort() = 0;
};
class AddMod : public Modification {
public:
AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key)
: Modification(), a(arg_a), key(arg_key) {}
virtual ~AddMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete a; delete key; }
protected:
file_analysis::Analyzer* a;
HashKey* key;
};
class RemoveMod : public Modification {
public:
RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete key; }
protected:
FA_Tag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -9,18 +9,18 @@ using namespace file_analysis;
DataEvent::DataEvent(RecordVal* args, File* file, DataEvent::DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se) EventHandlerPtr ce, EventHandlerPtr se)
: Action(args, file), chunk_event(ce), stream_event(se) : file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se)
{ {
} }
Action* DataEvent::Instantiate(RecordVal* args, File* file) file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file)
{ {
using BifType::Record::FileAnalysis::ActionArgs; using BifType::Record::FileAnalysis::AnalyzerArgs;
const char* chunk_field = "chunk_event"; const char* chunk_field = "chunk_event";
const char* stream_field = "stream_event"; const char* stream_field = "stream_event";
int chunk_off = ActionArgs->FieldOffset(chunk_field); int chunk_off = AnalyzerArgs->FieldOffset(chunk_field);
int stream_off = ActionArgs->FieldOffset(stream_field); int stream_off = AnalyzerArgs->FieldOffset(stream_field);
Val* chunk_val = args->Lookup(chunk_off); Val* chunk_val = args->Lookup(chunk_off);
Val* stream_val = args->Lookup(stream_off); Val* stream_val = args->Lookup(stream_off);

View file

@ -5,17 +5,17 @@
#include "Val.h" #include "Val.h"
#include "File.h" #include "File.h"
#include "Action.h" #include "Analyzer.h"
namespace file_analysis { namespace file_analysis {
/** /**
* An action to send file data to script-layer events. * An analyzer to send file data to script-layer events.
*/ */
class DataEvent : public Action { class DataEvent : public file_analysis::Analyzer {
public: public:
static Action* Instantiate(RecordVal* args, File* file); static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);

View file

@ -6,7 +6,7 @@
using namespace file_analysis; using namespace file_analysis;
Extract::Extract(RecordVal* args, File* file, const string& arg_filename) Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
: Action(args, file), filename(arg_filename) : file_analysis::Analyzer(args, file), filename(arg_filename)
{ {
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -25,11 +25,11 @@ Extract::~Extract()
safe_close(fd); safe_close(fd);
} }
Action* Extract::Instantiate(RecordVal* args, File* file) file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
{ {
using BifType::Record::FileAnalysis::ActionArgs; using BifType::Record::FileAnalysis::AnalyzerArgs;
const char* field = "extract_filename"; const char* field = "extract_filename";
Val* v = args->Lookup(ActionArgs->FieldOffset(field)); Val* v = args->Lookup(AnalyzerArgs->FieldOffset(field));
if ( ! v ) return 0; if ( ! v ) return 0;

View file

@ -5,17 +5,17 @@
#include "Val.h" #include "Val.h"
#include "File.h" #include "File.h"
#include "Action.h" #include "Analyzer.h"
namespace file_analysis { namespace file_analysis {
/** /**
* An action to simply extract files to disk. * An analyzer to extract files to disk.
*/ */
class Extract : public Action { class Extract : public file_analysis::Analyzer {
public: public:
static Action* Instantiate(RecordVal* args, File* file); static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
virtual ~Extract(); virtual ~Extract();

View file

@ -4,11 +4,12 @@
#include "File.h" #include "File.h"
#include "FileTimer.h" #include "FileTimer.h"
#include "FileID.h" #include "FileID.h"
#include "Analyzer.h"
#include "Manager.h" #include "Manager.h"
#include "Reporter.h" #include "Reporter.h"
#include "Val.h" #include "Val.h"
#include "Type.h" #include "Type.h"
#include "Analyzer.h" #include "../Analyzer.h"
#include "Event.h" #include "Event.h"
using namespace file_analysis; using namespace file_analysis;
@ -36,6 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn)
int File::id_idx = -1; int File::id_idx = -1;
int File::parent_id_idx = -1; int File::parent_id_idx = -1;
int File::source_idx = -1; int File::source_idx = -1;
int File::is_orig_idx = -1;
int File::conns_idx = -1; int File::conns_idx = -1;
int File::last_active_idx = -1; int File::last_active_idx = -1;
int File::seen_bytes_idx = -1; int File::seen_bytes_idx = -1;
@ -45,10 +47,8 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1; int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1; int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1; int File::bof_buffer_idx = -1;
int File::file_type_idx = -1;
int File::mime_type_idx = -1; int File::mime_type_idx = -1;
magic_t File::magic = 0;
magic_t File::magic_mime = 0; magic_t File::magic_mime = 0;
string File::salt; string File::salt;
@ -60,6 +60,7 @@ void File::StaticInit()
id_idx = Idx("id"); id_idx = Idx("id");
parent_id_idx = Idx("parent_id"); parent_id_idx = Idx("parent_id");
source_idx = Idx("source"); source_idx = Idx("source");
is_orig_idx = Idx("is_orig");
conns_idx = Idx("conns"); conns_idx = Idx("conns");
last_active_idx = Idx("last_active"); last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes"); seen_bytes_idx = Idx("seen_bytes");
@ -69,19 +70,18 @@ void File::StaticInit()
timeout_interval_idx = Idx("timeout_interval"); timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer"); bof_buffer_idx = Idx("bof_buffer");
file_type_idx = Idx("file_type");
mime_type_idx = Idx("mime_type"); mime_type_idx = Idx("mime_type");
bro_init_magic(&magic, MAGIC_NONE);
bro_init_magic(&magic_mime, MAGIC_MIME); bro_init_magic(&magic_mime, MAGIC_MIME);
salt = BifConst::FileAnalysis::salt->CheckString(); salt = BifConst::FileAnalysis::salt->CheckString();
} }
File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag) File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag,
bool is_orig)
: id(""), unique(unique), val(0), postpone_timeout(false), : id(""), unique(unique), val(0), postpone_timeout(false),
first_chunk(true), missed_bof(false), need_reassembly(false), done(false), first_chunk(true), missed_bof(false), need_reassembly(false), done(false),
actions(this) analyzers(this)
{ {
StaticInit(); StaticInit();
@ -101,8 +101,9 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag)
if ( conn ) if ( conn )
{ {
// add source and connection fields // add source, connection, is_orig fields
val->Assign(source_idx, new StringVal(Analyzer::GetTagName(tag))); val->Assign(source_idx, new StringVal(::Analyzer::GetTagName(tag)));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn); UpdateConnectionFields(conn);
} }
else else
@ -145,9 +146,16 @@ void File::UpdateConnectionFields(Connection* conn)
Val* idx = get_conn_id_val(conn); Val* idx = get_conn_id_val(conn);
if ( ! conns->AsTableVal()->Lookup(idx) ) if ( ! conns->AsTableVal()->Lookup(idx) )
{ {
conns->AsTableVal()->Assign(idx, conn->BuildConnVal()); Val* conn_val = conn->BuildConnVal();
if ( ! is_first ) conns->AsTableVal()->Assign(idx, conn_val);
file_mgr->FileEvent(file_over_new_connection, this);
if ( ! is_first && FileEventAvailable(file_over_new_connection) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(conn_val->Ref());
FileEvent(file_over_new_connection, vl);
}
} }
Unref(idx); Unref(idx);
@ -182,6 +190,11 @@ double File::GetTimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx); return LookupFieldDefaultInterval(timeout_interval_idx);
} }
void File::SetTimeoutInterval(double interval)
{
val->Assign(timeout_interval_idx, new Val(interval, TYPE_INTERVAL));
}
void File::IncrementByteCount(uint64 size, int field_idx) void File::IncrementByteCount(uint64 size, int field_idx)
{ {
uint64 old = LookupFieldDefaultCount(field_idx); uint64 old = LookupFieldDefaultCount(field_idx);
@ -207,14 +220,14 @@ void File::ScheduleInactivityTimer() const
timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval())); timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval()));
} }
bool File::AddAction(RecordVal* args) bool File::AddAnalyzer(RecordVal* args)
{ {
return done ? false : actions.QueueAddAction(args); return done ? false : analyzers.QueueAdd(args);
} }
bool File::RemoveAction(const RecordVal* args) bool File::RemoveAnalyzer(const RecordVal* args)
{ {
return done ? false : actions.QueueRemoveAction(args); return done ? false : analyzers.QueueRemove(args);
} }
bool File::BufferBOF(const u_char* data, uint64 len) bool File::BufferBOF(const u_char* data, uint64 len)
@ -235,18 +248,22 @@ bool File::BufferBOF(const u_char* data, uint64 len)
return true; return true;
} }
bool File::DetectTypes(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len)
{ {
const char* desc = bro_magic_buffer(magic, data, len);
const char* mime = bro_magic_buffer(magic_mime, data, len); const char* mime = bro_magic_buffer(magic_mime, data, len);
if ( desc )
val->Assign(file_type_idx, new StringVal(desc));
if ( mime ) if ( mime )
val->Assign(mime_type_idx, new StringVal(mime)); {
const char* mime_end = strchr(mime, ';');
return desc || mime; if ( mime_end )
// strip off charset
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
else
val->Assign(mime_type_idx, new StringVal(mime));
}
return mime;
} }
void File::ReplayBOF() void File::ReplayBOF()
@ -264,10 +281,9 @@ void File::ReplayBOF()
BroString* bs = concatenate(bof_buffer.chunks); BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs)); val->Assign(bof_buffer_idx, new StringVal(bs));
DetectTypes(bs->Bytes(), bs->Len()); DetectMIME(bs->Bytes(), bs->Len());
file_mgr->FileEvent(file_new, this); FileEvent(file_new);
mgr.Drain(); // need immediate feedback about actions to add
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()); DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
@ -275,28 +291,26 @@ void File::ReplayBOF()
void File::DataIn(const u_char* data, uint64 len, uint64 offset) void File::DataIn(const u_char* data, uint64 len, uint64 offset)
{ {
actions.DrainModifications(); analyzers.DrainModifications();
if ( first_chunk ) if ( first_chunk )
{ {
// TODO: this should all really be delayed until we attempt reassembly // TODO: this should all really be delayed until we attempt reassembly
DetectTypes(data, len); DetectMIME(data, len);
file_mgr->FileEvent(file_new, this); FileEvent(file_new);
mgr.Drain(); // need immediate feedback about actions to add
actions.DrainModifications();
first_chunk = false; first_chunk = false;
} }
Action* act = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = actions.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! act->DeliverChunk(data, len, offset) ) if ( ! a->DeliverChunk(data, len, offset) )
actions.QueueRemoveAction(act->Args()); analyzers.QueueRemove(a->Args());
} }
actions.DrainModifications(); analyzers.DrainModifications();
// TODO: check reassembly requirement based on buffer size in record // TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly ) if ( need_reassembly )
@ -311,38 +325,36 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
void File::DataIn(const u_char* data, uint64 len) void File::DataIn(const u_char* data, uint64 len)
{ {
actions.DrainModifications(); analyzers.DrainModifications();
if ( BufferBOF(data, len) ) return; if ( BufferBOF(data, len) ) return;
if ( missed_bof ) if ( missed_bof )
{ {
DetectTypes(data, len); DetectMIME(data, len);
file_mgr->FileEvent(file_new, this); FileEvent(file_new);
mgr.Drain(); // need immediate feedback about actions to add
actions.DrainModifications();
missed_bof = false; missed_bof = false;
} }
Action* act = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = actions.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! act->DeliverStream(data, len) ) if ( ! a->DeliverStream(data, len) )
{ {
actions.QueueRemoveAction(act->Args()); analyzers.QueueRemove(a->Args());
continue; continue;
} }
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx); LookupFieldDefaultCount(missing_bytes_idx);
if ( ! act->DeliverChunk(data, len, offset) ) if ( ! a->DeliverChunk(data, len, offset) )
actions.QueueRemoveAction(act->Args()); analyzers.QueueRemove(a->Args());
} }
actions.DrainModifications(); analyzers.DrainModifications();
IncrementByteCount(len, seen_bytes_idx); IncrementByteCount(len, seen_bytes_idx);
} }
@ -350,46 +362,79 @@ void File::EndOfFile()
{ {
if ( done ) return; if ( done ) return;
actions.DrainModifications(); analyzers.DrainModifications();
// Send along anything that's been buffered, but never flushed. // Send along anything that's been buffered, but never flushed.
ReplayBOF(); ReplayBOF();
done = true; done = true;
Action* act = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = actions.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! act->EndOfFile() ) if ( ! a->EndOfFile() )
actions.QueueRemoveAction(act->Args()); analyzers.QueueRemove(a->Args());
} }
file_mgr->FileEvent(file_state_remove, this); FileEvent(file_state_remove);
actions.DrainModifications(); analyzers.DrainModifications();
} }
void File::Gap(uint64 offset, uint64 len) void File::Gap(uint64 offset, uint64 len)
{ {
actions.DrainModifications(); analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got // If we were buffering the beginning of the file, a gap means we've got
// as much contiguous stuff at the beginning as possible, so work with that. // as much contiguous stuff at the beginning as possible, so work with that.
ReplayBOF(); ReplayBOF();
Action* act = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = actions.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
while ( (act = actions.NextEntry(c)) ) while ( (a = analyzers.NextEntry(c)) )
{ {
if ( ! act->Undelivered(offset, len) ) if ( ! a->Undelivered(offset, len) )
actions.QueueRemoveAction(act->Args()); analyzers.QueueRemove(a->Args());
} }
file_mgr->FileEvent(file_gap, this); if ( FileEventAvailable(file_gap) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new Val(offset, TYPE_COUNT));
vl->append(new Val(len, TYPE_COUNT));
FileEvent(file_gap, vl);
}
actions.DrainModifications(); analyzers.DrainModifications();
IncrementByteCount(len, missing_bytes_idx); IncrementByteCount(len, missing_bytes_idx);
} }
bool File::FileEventAvailable(EventHandlerPtr h)
{
return h && ! file_mgr->IsIgnored(unique);
}
void File::FileEvent(EventHandlerPtr h)
{
if ( ! FileEventAvailable(h) ) return;
val_list* vl = new val_list();
vl->append(val->Ref());
FileEvent(h, vl);
}
void File::FileEvent(EventHandlerPtr h, val_list* vl)
{
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_timeout )
{
// immediate feedback is required for these events.
mgr.Drain();
analyzers.DrainModifications();
}
}

View file

@ -8,7 +8,7 @@
#include "AnalyzerTags.h" #include "AnalyzerTags.h"
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
#include "ActionSet.h" #include "AnalyzerSet.h"
#include "FileID.h" #include "FileID.h"
#include "BroString.h" #include "BroString.h"
@ -34,6 +34,11 @@ public:
*/ */
double GetTimeoutInterval() const; double GetTimeoutInterval() const;
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
*/
void SetTimeoutInterval(double interval);
/** /**
* @return value of the "id" field from #val record. * @return value of the "id" field from #val record.
*/ */
@ -74,45 +79,61 @@ public:
void ScheduleInactivityTimer() const; void ScheduleInactivityTimer() const;
/** /**
* Queues attaching an action. Only one action per type can be attached at * Queues attaching an analyzer. Only one analyzer per type can be attached
* a time unless the arguments differ. * at a time unless the arguments differ.
* @return false if action can't be instantiated, else true. * @return false if analyzer can't be instantiated, else true.
*/ */
bool AddAction(RecordVal* args); bool AddAnalyzer(RecordVal* args);
/** /**
* Queues removal of an action. * Queues removal of an analyzer.
* @return true if action was active at time of call, else false. * @return true if analyzer was active at time of call, else false.
*/ */
bool RemoveAction(const RecordVal* args); bool RemoveAnalyzer(const RecordVal* args);
/** /**
* Pass in non-sequential data and deliver to attached actions/analyzers. * Pass in non-sequential data and deliver to attached analyzers.
*/ */
void DataIn(const u_char* data, uint64 len, uint64 offset); void DataIn(const u_char* data, uint64 len, uint64 offset);
/** /**
* Pass in sequential data and deliver to attached actions/analyzers. * Pass in sequential data and deliver to attached analyzers.
*/ */
void DataIn(const u_char* data, uint64 len); void DataIn(const u_char* data, uint64 len);
/** /**
* Inform attached actions/analyzers about end of file being seen. * Inform attached analyzers about end of file being seen.
*/ */
void EndOfFile(); void EndOfFile();
/** /**
* Inform attached actions/analyzers about a gap in file stream. * Inform attached analyzers about a gap in file stream.
*/ */
void Gap(uint64 offset, uint64 len); void Gap(uint64 offset, uint64 len);
/**
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
protected: protected:
/** /**
* Constructor; only file_analysis::Manager should be creating these. * Constructor; only file_analysis::Manager should be creating these.
*/ */
File(const string& unique, Connection* conn = 0, File(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error); AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false);
/** /**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the * Updates the "conn_ids" and "conn_uids" fields in #val record with the
@ -149,11 +170,11 @@ protected:
void ReplayBOF(); void ReplayBOF();
/** /**
* Does file/mime type detection and assigns types (if available) to * Does mime type detection and assigns type (if available) to \c mime_type
* corresponding fields in #val. * field in #val.
* @return whether a file or mime type was available. * @return whether mime type was available.
*/ */
bool DetectTypes(const u_char* data, uint64 len); bool DetectMIME(const u_char* data, uint64 len);
FileID id; /**< A pretty hash that likely identifies file */ FileID id; /**< A pretty hash that likely identifies file */
string unique; /**< A string that uniquely identifies file */ string unique; /**< A string that uniquely identifies file */
@ -163,7 +184,7 @@ protected:
bool missed_bof; /**< Flags that we missed start of file. */ bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */ bool done; /**< If this object is about to be deleted. */
ActionSet actions; AnalyzerSet analyzers;
struct BOF_Buffer { struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {} BOF_Buffer() : full(false), replayed(false), size(0) {}
@ -186,7 +207,6 @@ protected:
*/ */
static void StaticInit(); static void StaticInit();
static magic_t magic;
static magic_t magic_mime; static magic_t magic_mime;
static string salt; static string salt;
@ -194,6 +214,7 @@ protected:
static int id_idx; static int id_idx;
static int parent_id_idx; static int parent_id_idx;
static int source_idx; static int source_idx;
static int is_orig_idx;
static int conns_idx; static int conns_idx;
static int last_active_idx; static int last_active_idx;
static int seen_bytes_idx; static int seen_bytes_idx;
@ -203,7 +224,6 @@ protected:
static int timeout_interval_idx; static int timeout_interval_idx;
static int bof_buffer_size_idx; static int bof_buffer_size_idx;
static int bof_buffer_idx; static int bof_buffer_idx;
static int file_type_idx;
static int mime_type_idx; static int mime_type_idx;
}; };

View file

@ -7,7 +7,7 @@
using namespace file_analysis; using namespace file_analysis;
Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind) Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind)
: Action(args, file), hash(hv), fed(false), kind(arg_kind) : file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind)
{ {
hash->Init(); hash->Init();
} }

View file

@ -6,14 +6,14 @@
#include "Val.h" #include "Val.h"
#include "OpaqueVal.h" #include "OpaqueVal.h"
#include "File.h" #include "File.h"
#include "Action.h" #include "Analyzer.h"
namespace file_analysis { namespace file_analysis {
/** /**
* An action to produce a hash of file contents. * An analyzer to produce a hash of file contents.
*/ */
class Hash : public Action { class Hash : public file_analysis::Analyzer {
public: public:
virtual ~Hash(); virtual ~Hash();
@ -38,7 +38,7 @@ protected:
class MD5 : public Hash { class MD5 : public Hash {
public: public:
static Action* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; } { return file_hash ? new MD5(args, file) : 0; }
protected: protected:
@ -51,7 +51,7 @@ protected:
class SHA1 : public Hash { class SHA1 : public Hash {
public: public:
static Action* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; } { return file_hash ? new SHA1(args, file) : 0; }
protected: protected:
@ -64,7 +64,7 @@ protected:
class SHA256 : public Hash { class SHA256 : public Hash {
public: public:
static Action* Instantiate(RecordVal* args, File* file) static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; } { return file_hash ? new SHA256(args, file) : 0; }
protected: protected:

View file

@ -3,7 +3,7 @@
#include "Manager.h" #include "Manager.h"
#include "File.h" #include "File.h"
#include "Action.h" #include "Analyzer.h"
#include "Var.h" #include "Var.h"
#include "Event.h" #include "Event.h"
@ -40,7 +40,7 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
if ( IsDisabled(tag) ) return; if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig); GetFileHandle(tag, conn, is_orig);
DataIn(data, len, offset, GetFile(current_handle, conn, tag)); DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
} }
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
@ -65,7 +65,9 @@ void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
{ {
if ( IsDisabled(tag) ) return; if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig); GetFileHandle(tag, conn, is_orig);
DataIn(data, len, GetFile(current_handle, conn, tag)); // Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
} }
void Manager::DataIn(const u_char* data, uint64 len, const string& unique) void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
@ -108,7 +110,7 @@ void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag,
if ( IsDisabled(tag) ) return; if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig); GetFileHandle(tag, conn, is_orig);
Gap(offset, len, GetFile(current_handle, conn, tag)); Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
} }
void Manager::Gap(uint64 offset, uint64 len, const string& unique) void Manager::Gap(uint64 offset, uint64 len, const string& unique)
@ -129,7 +131,7 @@ void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
if ( IsDisabled(tag) ) return; if ( IsDisabled(tag) ) return;
GetFileHandle(tag, conn, is_orig); GetFileHandle(tag, conn, is_orig);
SetSize(size, GetFile(current_handle, conn, tag)); SetSize(size, GetFile(current_handle, conn, tag, is_orig));
} }
void Manager::SetSize(uint64 size, const string& unique) void Manager::SetSize(uint64 size, const string& unique)
@ -147,17 +149,6 @@ void Manager::SetSize(uint64 size, File* file)
RemoveFile(file->GetUnique()); RemoveFile(file->GetUnique());
} }
void Manager::FileEvent(EventHandlerPtr h, File* file)
{
if ( ! h ) return;
if ( IsIgnored(file->GetUnique()) ) return;
val_list * vl = new val_list();
vl->append(file->GetVal()->Ref());
mgr.QueueEvent(h, vl);
}
bool Manager::PostponeTimeout(const FileID& file_id) const bool Manager::PostponeTimeout(const FileID& file_id) const
{ {
File* file = Lookup(file_id); File* file = Lookup(file_id);
@ -168,26 +159,36 @@ bool Manager::PostponeTimeout(const FileID& file_id) const
return true; return true;
} }
bool Manager::AddAction(const FileID& file_id, RecordVal* args) const bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
{ {
File* file = Lookup(file_id); File* file = Lookup(file_id);
if ( ! file ) return false; if ( ! file ) return false;
return file->AddAction(args); file->SetTimeoutInterval(interval);
return true;
} }
bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
{ {
File* file = Lookup(file_id); File* file = Lookup(file_id);
if ( ! file ) return false; if ( ! file ) return false;
return file->RemoveAction(args); return file->AddAnalyzer(args);
}
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
{
File* file = Lookup(file_id);
if ( ! file ) return false;
return file->RemoveAnalyzer(args);
} }
File* Manager::GetFile(const string& unique, Connection* conn, File* Manager::GetFile(const string& unique, Connection* conn,
AnalyzerTag::Tag tag) AnalyzerTag::Tag tag, bool is_orig, bool update_conn)
{ {
if ( unique.empty() ) return 0; if ( unique.empty() ) return 0;
if ( IsIgnored(unique) ) return 0; if ( IsIgnored(unique) ) return 0;
@ -196,7 +197,7 @@ File* Manager::GetFile(const string& unique, Connection* conn,
if ( ! rval ) if ( ! rval )
{ {
rval = str_map[unique] = new File(unique, conn, tag); rval = str_map[unique] = new File(unique, conn, tag, is_orig);
FileID id = rval->GetID(); FileID id = rval->GetID();
if ( id_map[id] ) if ( id_map[id] )
@ -212,7 +213,8 @@ File* Manager::GetFile(const string& unique, Connection* conn,
else else
{ {
rval->UpdateLastActivityTime(); rval->UpdateLastActivityTime();
rval->UpdateConnectionFields(conn); if ( update_conn )
rval->UpdateConnectionFields(conn);
} }
return rval; return rval;
@ -235,8 +237,7 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
file->postpone_timeout = false; file->postpone_timeout = false;
FileEvent(file_timeout, file); file->FileEvent(file_timeout);
mgr.Drain(); // need immediate feedback about whether to postpone
if ( file->postpone_timeout && ! is_terminating ) if ( file->postpone_timeout && ! is_terminating )
{ {

View file

@ -97,23 +97,28 @@ public:
bool PostponeTimeout(const FileID& file_id) const; bool PostponeTimeout(const FileID& file_id) const;
/** /**
* Queue attachment of an action to the file identifier. Multiple actions * Set's an inactivity threshold for the file.
* of a given type can be attached per file identifier at a time as long as
* the arguments differ.
* @return false if the action failed to be instantiated, else true.
*/ */
bool AddAction(const FileID& file_id, RecordVal* args) const; bool SetTimeoutInterval(const FileID& file_id, double interval) const;
/** /**
* Queue removal of an action for a given file identifier. * Queue attachment of an analzer to the file identifier. Multiple
* @return true if the action is active at the time of call, else false. * analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @return false if the analyzer failed to be instantiated, else true.
*/ */
bool RemoveAction(const FileID& file_id, const RecordVal* args) const; bool AddAnalyzer(const FileID& file_id, RecordVal* args) const;
/** /**
* Queues an event related to the file's life-cycle. * Queue removal of an analyzer for a given file identifier.
* @return true if the analyzer is active at the time of call, else false.
*/ */
void FileEvent(EventHandlerPtr h, File* file); bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const;
/**
* @return whether the file mapped to \a unique is being ignored.
*/
bool IsIgnored(const string& unique);
protected: protected:
@ -129,7 +134,8 @@ protected:
* fields. * fields.
*/ */
File* GetFile(const string& unique, Connection* conn = 0, File* GetFile(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error); AnalyzerTag::Tag tag = AnalyzerTag::Error,
bool is_orig = false, bool update_conn = true);
/** /**
* @return the File object mapped to \a file_id, or a null pointer if no * @return the File object mapped to \a file_id, or a null pointer if no
@ -149,11 +155,6 @@ protected:
*/ */
bool RemoveFile(const string& unique); bool RemoveFile(const string& unique);
/**
* @return whether the file mapped to \a unique is being ignored.
*/
bool IsIgnored(const string& unique);
/** /**
* Sets #current_handle to a unique file handle string based on what the * Sets #current_handle to a unique file handle string based on what the
* \c get_file_handle event derives from the connection params. The * \c get_file_handle event derives from the connection params. The

View file

@ -206,6 +206,7 @@ void usage()
fprintf(stderr, " --use-binpac | use new-style BinPAC parsers when available\n"); fprintf(stderr, " --use-binpac | use new-style BinPAC parsers when available\n");
fprintf(stderr, " $BROPATH | file search path (%s)\n", bro_path()); fprintf(stderr, " $BROPATH | file search path (%s)\n", bro_path());
fprintf(stderr, " $BROMAGIC | libmagic mime magic database search path (%s)\n", bro_magic_path());
fprintf(stderr, " $BRO_PREFIXES | prefix list (%s)\n", bro_prefixes()); fprintf(stderr, " $BRO_PREFIXES | prefix list (%s)\n", bro_prefixes());
fprintf(stderr, " $BRO_DNS_FAKE | disable DNS lookups (%s)\n", bro_dns_fake()); fprintf(stderr, " $BRO_DNS_FAKE | disable DNS lookups (%s)\n", bro_dns_fake());
fprintf(stderr, " $BRO_SEED_FILE | file to load seeds from (not set)\n"); fprintf(stderr, " $BRO_SEED_FILE | file to load seeds from (not set)\n");

View file

@ -231,28 +231,25 @@ type gtp_gsn_addr: record;
module FileAnalysis; module FileAnalysis;
type ActionArgs: record; type AnalyzerArgs: record;
## An enumeration of various file analysis actions that can be taken. ## An enumeration of various file analysis actions that can be taken.
enum Action %{ enum Analyzer %{
## Extract a file to local filesystem ## Extract a file to local filesystem
ACTION_EXTRACT, ANALYZER_EXTRACT,
## Calculate an MD5 digest of the file's contents. ## Calculate an MD5 digest of the file's contents.
ACTION_MD5, ANALYZER_MD5,
## Calculate an SHA1 digest of the file's contents. ## Calculate an SHA1 digest of the file's contents.
ACTION_SHA1, ANALYZER_SHA1,
## Calculate an SHA256 digest of the file's contents. ## Calculate an SHA256 digest of the file's contents.
ACTION_SHA256, ANALYZER_SHA256,
## Deliver the file contents to the script-layer in an event. ## Deliver the file contents to the script-layer in an event.
ACTION_DATA_EVENT, ANALYZER_DATA_EVENT,
## Windows executable analyzer
ACTION_PE_ANALYZER,
%} %}
module GLOBAL; module GLOBAL;

Some files were not shown because too many files have changed in this diff Show more