Merge remote-tracking branch 'origin/master' into topic/seth/sumstats-updates

Conflicts:
	scripts/base/frameworks/sumstats/cluster.bro
	scripts/base/frameworks/sumstats/plugins/average.bro
	scripts/base/frameworks/sumstats/plugins/max.bro
	scripts/base/frameworks/sumstats/plugins/min.bro
	scripts/base/frameworks/sumstats/plugins/sample.bro
	scripts/base/frameworks/sumstats/plugins/std-dev.bro
	scripts/base/frameworks/sumstats/plugins/sum.bro
	scripts/base/frameworks/sumstats/plugins/unique.bro
	scripts/base/frameworks/sumstats/plugins/variance.bro
	scripts/policy/protocols/http/detect-sqli.bro
	testing/btest/scripts/base/frameworks/sumstats/cluster-intermediate-update.bro
This commit is contained in:
Seth Hall 2013-05-21 22:33:16 -04:00
commit 0a18b62d12
349 changed files with 237121 additions and 160465 deletions

114
CHANGES
View file

@ -1,4 +1,118 @@
2.1-656 | 2013-05-17 15:58:07 -0700
* Fix mutex lock problem for writers. (Bernhard Amann)
2.1-654 | 2013-05-17 13:49:52 -0700
* Tweaks to sqlite3 configuration to address threading issues.
(Bernhard Amann)
2.1-651 | 2013-05-17 13:37:16 -0700
* Fix uninitialized DPM member. (Jon Siwek)
* Fix issue with transaction ID reuse in a single DNS connection. (Seth Hall)
* New function added to the queue.bro script to support peeking at
the new gettable item in the queue without removing it. (Seth Hall)
2.1-647 | 2013-05-17 07:47:14 -0700
* Fixing Broxygen generation to have BROMAGIC set. (Robin Sommer)
* Fix for 'fchmod undeclared here' on FreeBSD. (Robin Sommer)
* CMake policy fix to avoid errors with older versions. (Robin
Sommer)
2.1-641 | 2013-05-15 18:15:09 -0700
* Test update. (Robin Sommer)
2.1-640 | 2013-05-15 17:24:09 -0700
* Support for cleaning up threads that have terminated. (Bernhard
Amann and Robin Sommer). Includes:
- Both logging and input frameworks now clean up threads once
they aren't further needed anymnore.
- New function Log::remove_stream() that removes a logging
stream, stopping all writer threads that are associated with
it. Note, however, that removing a *filter* from a stream
still doesn't clean up any threads. The problem is that
because of the output paths potentially being created
dynamically it's unclear if the writer thread will still be
needed in the future.
2.1-626 | 2013-05-15 16:09:31 -0700
* Add "reservoir" sampler for SumStats framework. This maintains
a set of N uniquely distributed random samples. (Bernhard Amann)
2.1-619 | 2013-05-15 16:01:42 -0700
* SQLite reader and writer combo. This allows to read/write
persistent data from on disk SQLite databases. The current
interface is quite low-level, we'll add higher-level abstractions
in the future. (Bernhard Amann)
2.1-576 | 2013-05-15 14:29:09 -0700
* Initial version of new file analysis framework. This moves most of
the processing of file content from script-land into the core,
where it belongs. Much of this is an internal change, and at this
point the new code has essentially feature-equality with the old
one. More script-level changes to come. (Jon Siwek)
2.1-502 | 2013-05-10 19:29:37 -0700
* Allow default function/hook/event parameters. Addresses #972. (Jon
Siwek)
* Change the endianness parameter of bytestring_to_count() BIF to
default to false (big endian). (Jon Siwek)
2.1-500 | 2013-05-10 19:22:24 -0700
* Fix to prevent merge-hook of SumStat's unique plugin from damaging
source data. (Bernhard Amann)
2.1-498 | 2013-05-03 17:44:08 -0700
* Table lookups return copy of non-const &default vals. This
prevents unintentional modifications to the &default value itself.
Addresses #981. (Jon Siwek)
2.1-496 | 2013-05-03 15:54:47 -0700
* Fix memory leak and unnecessary allocations in OpaqueVal.
Addresses #986. (Matthias Vallentin)
2.1-492 | 2013-05-02 12:46:26 -0700
* Work-around for sumstats framework not propagating updates after
intermediate check in cluster environments. (Bernhard Amann)
* Always apply tcp_connection_attempt. Before this change it was
only applied when a connection_attempt() event handler was
defined. (Robin Sommer)
* Fixing coverage.bare-mode-errors test. (Robin Sommer)
2.1-487 | 2013-05-01 18:03:22 -0700
* Always apply tcp_connection_attempt timer, even if no
connection_attempt() event handler is defined. (Robin Sommer)
2.1-486 | 2013-05-01 15:28:45 -0700
* New framework for computing summary statistics in
base/framework/sumstats. This replaces the metrics frameworks, and
comes with a number of applications build on top, see NEWS. More
documentation to follow. (Seth Hall)
2.1-397 | 2013-04-29 21:19:00 -0700
* Fixing memory leaks in CompHash implementation. Addresses #987.

View file

@ -17,12 +17,17 @@ set(BRO_SCRIPT_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts)
get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
"export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n"
"export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh
"setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n"
"setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1)
@ -69,6 +74,12 @@ if (MISSING_PREREQS)
message(FATAL_ERROR "Configuration aborted due to missing prerequisites")
endif ()
set(libmagic_req 5.04)
if ( LibMagic_VERSION VERSION_LESS ${libmagic_req} )
message(FATAL_ERROR "libmagic of at least version ${libmagic_req} required "
"(found ${LibMagic_VERSION})")
endif ()
include_directories(BEFORE
${PCAP_INCLUDE_DIR}
${OpenSSL_INCLUDE_DIR}
@ -190,6 +201,11 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING
PATTERN "COPYING" EXCLUDE
PATTERN "*"
)
########################################################################
## Packaging Setup

34
NEWS
View file

@ -51,6 +51,40 @@ New Functionality
can take up to two indices for the start and end index of the
substring to return (e.g. "mystring[1,3]").
- Functions now support default parameters, e.g.:
global foo: function(s: string, t: string &default="abc", u: count &default=0);
- The new file analysis framework moves most of the processing of file
content from script-land into the core, where it belongs. Much of
this is an internal change, the framework comes with the following
user-visibible functionality (some of that was already available
before, but done differently):
[TODO: This will probably change with further script updates.]
- A binary input reader interfaces the input framework with file
analysis, allowing to inject files on disk into Bro's
processing.
- Supports for analyzing data transfereed via HTTP range
requests.
- HTTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
- SMTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
* Provide access to start of entity data.
- FTP data transfers: Identify MIME type; record to disk.
- IRC DCC transfers: Record to disk.
Changed Functionality
~~~~~~~~~~~~~~~~~~~~~

View file

@ -1 +1 @@
2.1-397
2.1-656

@ -1 +1 @@
Subproject commit a4b8dd0b691c3f614537ad8471fc80a82ce7b2df
Subproject commit f86a3169b8d49189d264cbc1a7507260cd9ff51d

@ -1 +1 @@
Subproject commit 70681007546aad6e5648494e882b71adb9165105
Subproject commit cfaf4eea788bdac4ebfe9e46e3de2cd74b0bc068

@ -1 +1 @@
Subproject commit e64204fec55759c614a276c1933bbff2069a63db
Subproject commit 8955807b0f4151f5f6aca2e68d353b9b341d9f86

@ -1 +1 @@
Subproject commit 786b83664c6a15faeb153d118310526b7790deae
Subproject commit c25a64b173652e934bcd7b88e8573b306bf59ac5

2
cmake

@ -1 +1 @@
Subproject commit 94e72a3075bb0b9550ad05758963afda394bfb2c
Subproject commit e1a7fd00a0a66d6831a239fe84f5fcfaa54e2c35

View file

@ -107,7 +107,7 @@ macro(REST_TARGET srcDir broInput)
COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state
# generate the reST documentation using bro
COMMAND BROPATH=${BROPATH}:${srcDir} ${CMAKE_BINARY_DIR}/src/bro
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro
ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1)
# move generated doc into a new directory tree that
# defines the final structure of documents

View file

@ -19,6 +19,7 @@ rest_target(${psd} base/init-bare.bro internal)
rest_target(${CMAKE_BINARY_DIR}/src base/bro.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/const.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/event.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/file_analysis.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/input.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/logging.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/reporter.bif.bro)
@ -32,10 +33,13 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
rest_target(${psd} base/frameworks/communication/main.bro)
rest_target(${psd} base/frameworks/control/main.bro)
rest_target(${psd} base/frameworks/dpd/main.bro)
rest_target(${psd} base/frameworks/file-analysis/main.bro)
rest_target(${psd} base/frameworks/input/main.bro)
rest_target(${psd} base/frameworks/input/readers/ascii.bro)
rest_target(${psd} base/frameworks/input/readers/benchmark.bro)
rest_target(${psd} base/frameworks/input/readers/binary.bro)
rest_target(${psd} base/frameworks/input/readers/raw.bro)
rest_target(${psd} base/frameworks/input/readers/sqlite.bro)
rest_target(${psd} base/frameworks/intel/cluster.bro)
rest_target(${psd} base/frameworks/intel/input.bro)
rest_target(${psd} base/frameworks/intel/main.bro)
@ -46,6 +50,7 @@ rest_target(${psd} base/frameworks/logging/writers/ascii.bro)
rest_target(${psd} base/frameworks/logging/writers/dataseries.bro)
rest_target(${psd} base/frameworks/logging/writers/elasticsearch.bro)
rest_target(${psd} base/frameworks/logging/writers/none.bro)
rest_target(${psd} base/frameworks/logging/writers/sqlite.bro)
rest_target(${psd} base/frameworks/notice/actions/add-geodata.bro)
rest_target(${psd} base/frameworks/notice/actions/drop.bro)
rest_target(${psd} base/frameworks/notice/actions/email_admin.bro)
@ -65,6 +70,7 @@ rest_target(${psd} base/frameworks/sumstats/cluster.bro)
rest_target(${psd} base/frameworks/sumstats/main.bro)
rest_target(${psd} base/frameworks/sumstats/non-cluster.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/average.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/last.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/max.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/min.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro)
@ -80,21 +86,25 @@ rest_target(${psd} base/protocols/conn/main.bro)
rest_target(${psd} base/protocols/conn/polling.bro)
rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/file-analysis.bro)
rest_target(${psd} base/protocols/ftp/file-extract.bro)
rest_target(${psd} base/protocols/ftp/gridftp.bro)
rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/http/file-analysis.bro)
rest_target(${psd} base/protocols/http/file-extract.bro)
rest_target(${psd} base/protocols/http/file-hash.bro)
rest_target(${psd} base/protocols/http/file-ident.bro)
rest_target(${psd} base/protocols/http/main.bro)
rest_target(${psd} base/protocols/http/utils.bro)
rest_target(${psd} base/protocols/irc/dcc-send.bro)
rest_target(${psd} base/protocols/irc/file-analysis.bro)
rest_target(${psd} base/protocols/irc/main.bro)
rest_target(${psd} base/protocols/modbus/consts.bro)
rest_target(${psd} base/protocols/modbus/main.bro)
rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} base/protocols/smtp/entities.bro)
rest_target(${psd} base/protocols/smtp/file-analysis.bro)
rest_target(${psd} base/protocols/smtp/main.bro)
rest_target(${psd} base/protocols/socks/consts.bro)
rest_target(${psd} base/protocols/socks/main.bro)
@ -137,7 +147,13 @@ rest_target(${psd} policy/integration/barnyard2/main.bro)
rest_target(${psd} policy/integration/barnyard2/types.bro)
rest_target(${psd} policy/integration/collective-intel/main.bro)
rest_target(${psd} policy/misc/analysis-groups.bro)
rest_target(${psd} policy/misc/app-metrics.bro)
rest_target(${psd} policy/misc/app-stats/main.bro)
rest_target(${psd} policy/misc/app-stats/plugins/facebook.bro)
rest_target(${psd} policy/misc/app-stats/plugins/gmail.bro)
rest_target(${psd} policy/misc/app-stats/plugins/google.bro)
rest_target(${psd} policy/misc/app-stats/plugins/netflix.bro)
rest_target(${psd} policy/misc/app-stats/plugins/pandora.bro)
rest_target(${psd} policy/misc/app-stats/plugins/youtube.bro)
rest_target(${psd} policy/misc/capture-loss.bro)
rest_target(${psd} policy/misc/detect-traceroute/main.bro)
rest_target(${psd} policy/misc/loaded-scripts.bro)

View file

@ -459,6 +459,31 @@ The Bro scripting language supports the following built-in types.
print greeting("Dave");
Function parameters may specify default values as long as they appear
last in the parameter list:
.. code:: bro
global foo: function(s: string, t: string &default="abc", u: count &default=0);
If a function was previously declared with default parameters, the
default expressions can be omitted when implementing the function
body and they will still be used for function calls that lack those
arguments.
.. code:: bro
function foo(s: string, t: string, u: count)
{
print s, t, u;
}
And calls to the function may omit the defaults from the argument list:
.. code:: bro
foo("test");
.. bro:type:: event
Event handlers are nearly identical in both syntax and semantics to
@ -597,10 +622,10 @@ scripting language supports the following built-in attributes.
.. bro:attr:: &default
Uses a default value for a record field or container elements. For
example, ``table[int] of string &default="foo" }`` would create a
table that returns the :bro:type:`string` ``"foo"`` for any
non-existing index.
Uses a default value for a record field, a function/hook/event
parameter, or container elements. For example, ``table[int] of
string &default="foo" }`` would create a table that returns the
:bro:type:`string` ``"foo"`` for any non-existing index.
.. bro:attr:: &redef

29
magic/COPYING Normal file
View file

@ -0,0 +1,29 @@
# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
# Software written by Ian F. Darwin and others;
# maintained 1994- Christos Zoulas.
#
# This software is not subject to any export provision of the United States
# Department of Commerce, and may be exported to any country or planet.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice immediately at the beginning of the file, without modification,
# this list of conditions, and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

208
magic/animation Normal file
View file

@ -0,0 +1,208 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $
# animation: file(1) magic for animation/movie formats
#
# animation formats
# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
# SGI and Apple formats
0 string MOVI Silicon Graphics movie file
!:mime video/x-sgi-movie
4 string moov Apple QuickTime
!:mime video/quicktime
4 string mdat Apple QuickTime movie (unoptimized)
!:mime video/quicktime
#4 string wide Apple QuickTime movie (unoptimized)
#!:mime video/quicktime
#4 string skip Apple QuickTime movie (modified)
#!:mime video/quicktime
#4 string free Apple QuickTime movie (modified)
#!:mime video/quicktime
4 string idsc Apple QuickTime image (fast start)
!:mime image/x-quicktime
#4 string idat Apple QuickTime image (unoptimized)
#!:mime image/x-quicktime
4 string pckg Apple QuickTime compressed archive
!:mime application/x-quicktime-player
4 string/W jP JPEG 2000 image
!:mime image/jp2
4 string ftyp ISO Media
>8 string isom \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp41 \b, MPEG v4 system, version 1
!:mime video/mp4
>8 string mp42 \b, MPEG v4 system, version 2
!:mime video/mp4
>8 string/W jp2 \b, JPEG 2000
!:mime image/jp2
>8 string 3ge \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gg \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gp \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3gs \b, MPEG v4 system, 3GPP
!:mime video/3gpp
>8 string 3g2 \b, MPEG v4 system, 3GPP2
!:mime video/3gpp2
>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile
!:mime video/mp4
>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC
!:mime video/3gpp
>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC
!:mime audio/mp4
>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC
!:mime video/mp4
>8 string/W qt \b, Apple QuickTime movie
!:mime video/quicktime
# MPEG sequences
# Scans for all common MPEG header start codes
0 belong&0xFFFFFF00 0x00000100
>3 byte 0xBA MPEG sequence
!:mime video/mpeg
# GRR too general as it catches also FoxPro Memo example NG.FPT
>3 byte 0xB0 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB5 MPEG sequence, v4
!:mime video/mpeg4-generic
>3 byte 0xB3 MPEG sequence
!:mime video/mpeg
# MPEG ADTS Audio (*.mpx/mxa/aac)
# from dreesen@math.fu-berlin.de
# modified to fully support MPEG ADTS
# MP3, M1A
# modified by Joerg Jenderek
# GRR the original test are too common for many DOS files
# so don't accept as MP3 until we've tested the rate
0 beshort&0xFFFE 0xFFFA
# rates
>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps
!:mime audio/mpeg
>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps
!:mime audio/mpeg
>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps
!:mime audio/mpeg
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
!:mime audio/mpeg
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
!:mime audio/mpeg
# MPA, M2A
0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2
!:mime audio/mpeg
# MP3, M25A
0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5
!:mime audio/mpeg
# Stored AAC streams (instead of the MP4 format)
0 string ADIF MPEG ADIF, AAC
!:mime audio/x-hx-aac-adif
# Live or stored single AAC stream (used with MPEG-2 systems)
0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC
!:mime audio/x-hx-aac-adts
# Live MPEG-4 audio streams (instead of RTP FlexMux)
0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS
!:mime audio/x-mp4a-latm
# This magic isn't strong enough (matches plausible ISO-8859-1 text)
#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream
#!:mime audio/x-mp4a-latm
# Summary: FLI animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF11
# standard FLI always has 320x200 resolution and 8 bit color
>8 leshort 320
>>10 leshort 200
>>>12 leshort 8 FLI animation, 320x200x8
!:mime video/x-fli
# Summary: FLC animation format
# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
4 leshort 0xAF12
# standard FLC always use 8 bit color
>12 leshort 8 FLC animation
!:mime video/x-flc
# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
0 belong 0x3026b275 Microsoft ASF
!:mime video/x-ms-asf
# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8aMNG MNG video data,
!:mime video/x-mng
# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
0 string \x8bJNG JNG video data,
!:mime video/x-jng
# VRML (Virtual Reality Modelling Language)
0 string/w #VRML\ V1.0\ ascii VRML 1 file
!:mime model/vrml
0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file
!:mime model/vrml
# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
# From Michel Briand <michelbriand@free.fr>
0 string/t \<?xml\ version="
!:strength +1
>20 search/1000/cw \<!DOCTYPE\ X3D X3D (Extensible 3D) model xml text
!:mime model/x3d
# MPEG file
# MPEG sequences
# FIXME: This section is from the old magic.mime file and needs integrating with the rest
0 belong 0x000001BA
>4 byte &0x40
!:mime video/mp2p
>4 byte ^0x40
!:mime video/mpeg
0 belong 0x000001BB
!:mime video/mpeg
0 belong 0x000001B0
!:mime video/mp4v-es
0 belong 0x000001B5
!:mime video/mp4v-es
0 belong 0x000001B3
!:mime video/mpv
0 belong&0xFF5FFF1F 0x47400010
!:mime video/mp2t
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264

242
magic/archive Normal file
View file

@ -0,0 +1,242 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
# pre-POSIX "tar" archives are handled in the C code.
# POSIX tar archives
257 string ustar\0 POSIX tar archive
!:mime application/x-tar # encoding: posix
257 string ustar\040\040\0 GNU tar archive
!:mime application/x-tar # encoding: gnu
# cpio archives
#
# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
# The idea is to indicate archives produced on machines with the same
# byte order as the machine running "file" with "cpio archive", and
# to indicate archives produced on machines with the opposite byte order
# from the machine running "file" with "byte-swapped cpio archive".
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
0 short 070707 cpio archive
!:mime application/x-cpio
0 short 0143561 byte-swapped cpio archive
!:mime application/x-cpio # encoding: swapped
#
# System V Release 1 portable(?) archive format.
#
0 string =<ar> System V Release 1 ar archive
!:mime application/x-archive
#
# Debian package; it's in the portable archive format, and needs to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "debian".
#
0 string =!<arch>\ndebian
!:mime application/x-debian-package
#
# MIPS archive; they're in the portable archive format, and need to go
# before the entry for regular portable archives, as it's recognized as
# a portable archive whose first member has a name beginning with
# "__________E".
#
0 string =!<arch>\n__________E MIPS archive
!:mime application/x-archive
#
# BSD/SVR2-and-later portable archive formats.
#
0 string =!<arch> current ar archive
!:mime application/x-archive
# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
#
# The first byte is the magic (0x1a), byte 2 is the compression type for
# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
# filename of the first file (null terminated). Since some types collide
# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
!:mime application/x-arc
# [JW] stuff taken from idarc, obviously ARC successors:
0 lelong&0x8080ffff 0x00000a1a PAK archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000141a ARC+ archive data
!:mime application/x-arc
0 lelong&0x8080ffff 0x0000481a HYP archive data
!:mime application/x-arc
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data
!:mime application/x-arj
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lh0- LHarc 1.x/ARX archive data [lh0]
!:mime application/x-lharc
2 string -lh1- LHarc 1.x/ARX archive data [lh1]
!:mime application/x-lharc
2 string -lz4- LHarc 1.x archive data [lz4]
!:mime application/x-lharc
2 string -lz5- LHarc 1.x archive data [lz5]
!:mime application/x-lharc
# [never seen any but the last; -lh4- reported in comp.compression:]
2 string -lzs- LHa/LZS archive data [lzs]
!:mime application/x-lha
2 string -lh\40- LHa 2.x? archive data [lh ]
!:mime application/x-lha
2 string -lhd- LHa 2.x? archive data [lhd]
!:mime application/x-lha
2 string -lh2- LHa 2.x? archive data [lh2]
!:mime application/x-lha
2 string -lh3- LHa 2.x? archive data [lh3]
!:mime application/x-lha
2 string -lh4- LHa (2.x) archive data [lh4]
!:mime application/x-lha
2 string -lh5- LHa (2.x) archive data [lh5]
!:mime application/x-lha
2 string -lh6- LHa (2.x) archive data [lh6]
!:mime application/x-lha
2 string -lh7- LHa (2.x)/LHark archive data [lh7]
!:mime application/x-lha
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0 string Rar! RAR archive data,
!:mime application/x-rar
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
!:mime application/zip
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\003\004
# Specialised zip formats which start with a member named 'mimetype'
# (stored uncompressed, with no 'extra field') containing the file's MIME type.
# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
# contents starting with "application/":
>26 string \x8\0\0\0mimetypeapplication/
# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
# http://lists.oasis-open.org/archives/office/200505/msg00006.html
# (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
>>50 string vnd.oasis.opendocument. OpenDocument
>>>73 string text
>>>>77 byte !0x2d Text
!:mime application/vnd.oasis.opendocument.text
>>>>77 string -template Text Template
!:mime application/vnd.oasis.opendocument.text-template
>>>>77 string -web HTML Document Template
!:mime application/vnd.oasis.opendocument.text-web
>>>>77 string -master Master Document
!:mime application/vnd.oasis.opendocument.text-master
>>>73 string graphics
>>>>81 byte !0x2d Drawing
!:mime application/vnd.oasis.opendocument.graphics
>>>>81 string -template Template
!:mime application/vnd.oasis.opendocument.graphics-template
>>>73 string presentation
>>>>85 byte !0x2d Presentation
!:mime application/vnd.oasis.opendocument.presentation
>>>>85 string -template Template
!:mime application/vnd.oasis.opendocument.presentation-template
>>>73 string spreadsheet
>>>>84 byte !0x2d Spreadsheet
!:mime application/vnd.oasis.opendocument.spreadsheet
>>>>84 string -template Template
!:mime application/vnd.oasis.opendocument.spreadsheet-template
>>>73 string chart
>>>>78 byte !0x2d Chart
!:mime application/vnd.oasis.opendocument.chart
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.chart-template
>>>73 string formula
>>>>80 byte !0x2d Formula
!:mime application/vnd.oasis.opendocument.formula
>>>>80 string -template Template
!:mime application/vnd.oasis.opendocument.formula-template
>>>73 string database Database
!:mime application/vnd.oasis.opendocument.database
>>>73 string image
>>>>78 byte !0x2d Image
!:mime application/vnd.oasis.opendocument.image
>>>>78 string -template Template
!:mime application/vnd.oasis.opendocument.image-template
# EPUB (OEBPS) books using OCF (OEBPS Container Format)
# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
# From: Ralf Brown <ralf.brown@gmail.com>
>0x1E string mimetypeapplication/epub+zip EPUB document
!:mime application/epub+zip
# Catch other ZIP-with-mimetype formats
# In a ZIP file, the bytes immediately after a member's contents are
# always "PK". The 2 regex rules here print the "mimetype" member's
# contents up to the first 'P'. Luckily, most MIME types don't contain
# any capital 'P's. This is a kludge.
# (mimetype contains "application/<OTHER>")
>>50 string !epub+zip
>>>50 string !vnd.oasis.opendocument.
>>>>50 string !vnd.sun.xml.
>>>>>50 string !vnd.kde.
>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# (mimetype contents other than "application/*")
>26 string \x8\0\0\0mimetype
>>38 string !application/
>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
# Java Jar files
>(26.s+30) leshort 0xcafe Java Jar file data (zip)
!:mime application/jar
# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
# Next line excludes specialized formats:
>(26.s+30) leshort !0xcafe
>>26 string !\x8\0\0\0mimetype Zip archive data
!:mime application/zip
# Zoo archiver
20 lelong 0xfdc4a7dc Zoo archive data
!:mime application/x-zoo
# Shell archives
10 string #\ This\ is\ a\ shell\ archive shell archive text
!:mime application/octet-stream
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
# EET archive
# From: Tilman Sauerbeck <tilman@code-monkey.de>
0 belong 0x1ee7ff00 EET archive
!:mime application/x-eet
# Symbian installation files
# http://www.thouky.co.uk/software/psifs/sis.html
# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8 lelong 0x10000419 Symbian installation file
!:mime application/vnd.symbian.install
0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
!:mime x-epoc/x-sisx-app

19
magic/assembler Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $
# make: file(1) magic for assembler source
#
0 regex \^[\020\t]*\\.asciiz assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.byte assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.even assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.globl assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.text assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.file assembler source text
!:mime text/x-asm
0 regex \^[\020\t]*\\.type assembler source text
!:mime text/x-asm

149
magic/audio Normal file
View file

@ -0,0 +1,149 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $
# audio: file(1) magic for sound formats (see also "iff")
#
# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
# and others
#
# Sun/NeXT audio data
0 string .snd Sun/NeXT audio data:
>12 belong 1 8-bit ISDN mu-law,
!:mime audio/basic
>12 belong 2 8-bit linear PCM [REF-PCM],
!:mime audio/basic
>12 belong 3 16-bit linear PCM,
!:mime audio/basic
>12 belong 4 24-bit linear PCM,
!:mime audio/basic
>12 belong 5 32-bit linear PCM,
!:mime audio/basic
>12 belong 6 32-bit IEEE floating point,
!:mime audio/basic
>12 belong 7 64-bit IEEE floating point,
!:mime audio/basic
>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-adpcm
# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
# that uses little-endian encoding and has a different magic number
0 lelong 0x0064732E DEC audio data:
>12 lelong 1 8-bit ISDN mu-law,
!:mime audio/x-dec-basic
>12 lelong 2 8-bit linear PCM [REF-PCM],
!:mime audio/x-dec-basic
>12 lelong 3 16-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 4 24-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 5 32-bit linear PCM,
!:mime audio/x-dec-basic
>12 lelong 6 32-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 7 64-bit IEEE floating point,
!:mime audio/x-dec-basic
>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
!:mime audio/x-dec-basic
# Creative Labs AUDIO stuff
0 string MThd Standard MIDI data
!:mime audio/midi
0 string CTMF Creative Music (CMF) data
!:mime audio/x-unknown
0 string SBI SoundBlaster instrument data
!:mime audio/x-unknown
0 string Creative\ Voice\ File Creative Labs voice data
!:mime audio/x-unknown
# Real Audio (Magic .ra\0375)
0 belong 0x2e7261fd RealAudio sound file
!:mime audio/x-pn-realaudio
0 string .RMF\0\0\0 RealMedia file
!:mime application/vnd.rn-realmedia
# mime types according to http://www.geocities.com/nevilo/mod.htm:
# audio/it .it
# audio/x-zipped-it .itz
# audio/xm fasttracker modules
# audio/x-s3m screamtracker modules
# audio/s3m screamtracker modules
# audio/x-zipped-mod mdz
# audio/mod mod
# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
#
# Taken from loader code from mikmod version 2.14
# by Steve McIntyre (stevem@chiark.greenend.org.uk)
# <doj@cubic.org> added title printing on 2003-06-24
0 string MAS_UTrack_V00
>14 string >/0 ultratracker V1.%.1s module sound data
!:mime audio/x-mod
#audio/x-tracker-module
0 string Extended\ Module: Fasttracker II module sound data
!:mime audio/x-mod
#audio/x-tracker-module
21 string/c =!SCREAM! Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
21 string BMOD2STM Screamtracker 2 module sound data
!:mime audio/x-mod
#audio/x-screamtracker-module
1080 string M.K. 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string M!K! 4-channel Protracker module sound data
!:mime audio/x-mod
#audio/x-protracker-module
1080 string FLT4 4-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string FLT8 8-channel Startracker module sound data
!:mime audio/x-mod
#audio/x-startracker-module
1080 string 4CHN 4-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 6CHN 6-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string 8CHN 8-channel Fasttracker module sound data
!:mime audio/x-mod
#audio/x-fasttracker-module
1080 string CD81 8-channel Octalyser module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
1080 string OKTA 8-channel Octalyzer module sound data
!:mime audio/x-mod
#audio/x-octalysertracker-module
# Not good enough.
#1082 string CH
#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data
1080 string 16CN 16-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
1080 string 32CN 32-channel Taketracker module sound data
!:mime audio/x-mod
#audio/x-taketracker-module
# Impulse tracker module (audio/x-it)
0 string IMPM Impulse Tracker module sound data -
!:mime audio/x-mod
# Free lossless audio codec <http://flac.sourceforge.net>
# From: Przemyslaw Augustyniak <silvathraec@rpg.pl>
0 string fLaC FLAC audio bitstream data
!:mime audio/x-flac
# Monkey's Audio compressed audio format (.ape)
# From danny.milo@gmx.net (Danny Milosavljevic)
# New version from Abel Cheung <abel (@) oaka.org>
0 string MAC\040 Monkey's Audio compressed format
!:mime audio/x-ape
# musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
0 string MP+ Musepack audio
!:mime audio/x-musepack

47
magic/c-lang Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
# c-lang: file(1) magic for C and related languages programs
#
# BCPL
0 search/8192 "libhdr" BCPL source text
!:mime text/x-bcpl
0 search/8192 "LIBHDR" BCPL source text
!:mime text/x-bcpl
# C
0 regex \^#include C source text
!:mime text/x-c
0 regex \^char C source text
!:mime text/x-c
0 regex \^double C source text
!:mime text/x-c
0 regex \^extern C source text
!:mime text/x-c
0 regex \^float C source text
!:mime text/x-c
0 regex \^struct C source text
!:mime text/x-c
0 regex \^union C source text
!:mime text/x-c
0 search/8192 main( C source text
!:mime text/x-c
# C++
# The strength of these rules is increased so they beat the C rules above
0 regex \^template C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^virtual C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^class C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^public: C++ source text
!:strength + 5
!:mime text/x-c++
0 regex \^private: C++ source text
!:strength + 5
!:mime text/x-c++

31
magic/cafebabe Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $
# Cafe Babes unite!
#
# Since Java bytecode and Mach-O universal binaries have the same magic number,
# the test must be performed in the same "magic" sequence to get both right.
# The long at offset 4 in a Mach-O universal binary tells the number of
# architectures; the short at offset 4 in a Java bytecode file is the JVM minor
# version and the short at offset 6 is the JVM major version. Since there are only
# only 18 labeled Mach-O architectures at current, and the first released
# Java class format was version 43.0, we can safely choose any number
# between 18 and 39 to test the number of architectures against
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
### JAVA START ###
0 belong 0xcafebabe
!:mime application/x-java-applet
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
!:mime application/x-java-pack200
### JAVA END ###

82
magic/commands Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $
# commands: file(1) magic for various shells and interpreters
#
#0 string/w : shell archive or script for antique kernel text
0 string/wt #!\ /bin/sh POSIX shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/csh C shell script text executable
!:mime text/x-shellscript
# korn shell magic, sent by George Wu, gwu@clyde.att.com
0 string/wt #!\ /bin/ksh Korn shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable
!:mime text/x-shellscript
#
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable
!:mime text/x-shellscript
0 string/wt #!\ /bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /usr/local/bin/nawk new awk script text executable
!:mime text/x-nawk
0 string/wt #!\ /bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/bin/gawk GNU awk script text executable
!:mime text/x-gawk
0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable
!:mime text/x-gawk
#
0 string/wt #!\ /bin/awk awk script text executable
!:mime text/x-awk
0 string/wt #!\ /usr/bin/awk awk script text executable
!:mime text/x-awk
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
0 string/wt #!\ /bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable
!:mime text/x-shellscript
# PHP scripts
# Ulf Harnhammar <ulfh@update.uu.se>
0 search/1/c =<?php PHP script text
!:strength + 10
!:mime text/x-php
0 search/1 =<?\n PHP script text
!:mime text/x-php
0 search/1 =<?\r PHP script text
!:mime text/x-php
0 search/1/w #!\ /usr/local/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
0 search/1/w #!\ /usr/bin/php PHP script text executable
!:strength + 10
!:mime text/x-php
# Smarty compiled template, http://www.smarty.net/
# Elan Ruusamae <glen@delfi.ee>
0 string =<?php\ /*\ Smarty\ version Smarty compiled template
>24 regex [0-9.]+ \b, version %s
!:mime text/x-php

77
magic/compress Normal file
View file

@ -0,0 +1,77 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $
# compress: file(1) magic for pure-compression formats (no archives)
#
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
#
# Formats for various forms of compressed data
# Formats for "compress" proper have been moved into "compress.c",
# because it tries to uncompress it to figure out what's inside.
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
!:apple LZIVZIVU
# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
# * Original filename is only at offset 10 if "extra field" absent
# * Produce shorter output - notably, only report compression methods
# other than 8 ("deflate", the only method defined in RFC 1952).
0 string \037\213 gzip compressed data
!:mime application/x-gzip
# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
0 string \037\036 packed data
!:mime application/octet-stream
#
# This magic number is byte-order-independent.
0 short 0x1f1f old packed data
!:mime application/octet-stream
# XXX - why *two* entries for "compacted data", one of which is
# byte-order independent, and one of which is byte-order dependent?
#
0 short 0x1fff compacted data
!:mime application/octet-stream
# This string is valid for SunOS (BE) and a matching "short" is listed
# in the Ultrix (LE) magic file.
0 string \377\037 compacted data
!:mime application/octet-stream
0 short 0145405 huf output
!:mime application/octet-stream
# bzip2
0 string BZh bzip2 compressed data
!:mime application/x-bzip2
# lzip
0 string LZIP lzip compressed data
!:mime application/x-lzip
# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# http://www.7-zip.org or DOC/7zFormat.txt
#
0 string 7z\274\257\047\034 7-zip archive data,
>6 byte x version %d
>7 byte x \b.%d
!:mime application/x-7z-compressed
# Type: LZMA
0 lelong&0xffffff =0x5d
>12 leshort =0xff LZMA compressed data,
>>5 lequad =0xffffffffffffffff streamed
>>5 lequad !0xffffffffffffffff non-streamed, size %lld
!:mime application/x-lzma
# http://tukaani.org/xz/xz-file-format.txt
0 ustring \xFD7zXZ\x00 XZ compressed data
!:mime application/x-xz
# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
0 string LRZI LRZIP compressed data
>4 byte x - version %d
>5 byte x \b.%d
!:mime application/x-lrzip

47
magic/database Normal file
View file

@ -0,0 +1,47 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $
# database: file(1) magic for various databases
#
# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
#
#
# GDBM magic numbers
# Will be maintained as part of the GDBM distribution in the future.
# <downsj@teeny.org>
0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian
!:mime application/x-gdbm
0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian
!:mime application/x-gdbm
0 string GDBM GNU dbm 2.x database
!:mime application/x-gdbm
#
# Berkeley DB
#
# Ian Darwin's file /etc/magic files: big/little-endian version.
#
# Hash 1.85/1.86 databases store metadata in network byte order.
# Btree 1.85/1.86 databases store the metadata in host byte order.
# Hash and Btree 2.X and later databases store the metadata in host byte order.
0 long 0x00061561 Berkeley DB
!:mime application/x-dbm
# MS Access database
4 string Standard\ Jet\ DB Microsoft Access Database
!:mime application/x-msaccess
4 string Standard\ ACE\ DB Microsoft Access Database
!:mime application/x-msaccess
# Tokyo Cabinet magic data
# http://tokyocabinet.sourceforge.net/index.html
0 string ToKyO\ CaBiNeT\n Tokyo Cabinet
>14 string x \b (%s)
>32 byte 0 \b, Hash
!:mime application/x-tokyocabinet-hash
>32 byte 1 \b, B+ tree
!:mime application/x-tokyocabinet-btree
>32 byte 2 \b, Fixed-length
!:mime application/x-tokyocabinet-fixed
>32 byte 3 \b, Table
!:mime application/x-tokyocabinet-table

25
magic/diff Normal file
View file

@ -0,0 +1,25 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $
# diff: file(1) magic for diff(1) output
#
0 search/1 diff\ diff output text
!:mime text/x-diff
0 search/1 ***\ diff output text
!:mime text/x-diff
0 search/1 Only\ in\ diff output text
!:mime text/x-diff
0 search/1 Common\ subdirectories:\ diff output text
!:mime text/x-diff
0 search/1 Index: RCS/CVS diff output text
!:mime text/x-diff
# unified diff
0 search/4096 ---\
>&0 search/1024 \n
>>&0 search/1 +++\
>>>&0 search/1024 \n
>>>>&0 search/1 @@ unified diff output text
!:mime text/x-diff
!:strength + 90

43
magic/elf Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# elf: file(1) magic for ELF executables
#
# We have to check the byte order flag to see what byte order all the
# other stuff in the header is in.
#
# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
#
# Created by: unknown
# Modified by (1): Daniel Quinlan <quinlan@yggdrasil.com>
# Modified by (2): Peter Tobias <tobias@server.et-inf.fho-emden.de> (core support)
# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk@ds9.au.s.shuttle.de> (fix of core support)
# Modified by (4): <gerardo.cacciari@gmail.com> (VMS Itanium)
# Modified by (5): Matthias Urlichs <smurf@debian.org> (Listing of many architectures)
0 string \177ELF ELF
>4 byte 0 invalid class
>4 byte 1 32-bit
>4 byte 2 64-bit
>5 byte 0 invalid byte order
>5 byte 1 LSB
>>16 leshort 0 no file type,
!:strength *2
!:mime application/octet-stream
>>16 leshort 1 relocatable,
!:mime application/x-object
>>16 leshort 2 executable,
!:mime application/x-executable
>>16 leshort 3 shared object,
!:mime application/x-sharedlib
>>16 leshort 4 core file
!:mime application/x-coredump
>5 byte 2 MSB
>>16 beshort 0 no file type,
!:mime application/octet-stream
>>16 beshort 1 relocatable,
!:mime application/x-object
>>16 beshort 2 executable,
!:mime application/x-executable
>>16 beshort 3 shared object,
!:mime application/x-sharedlib
>>16 beshort 4 core file,
!:mime application/x-coredump

34
magic/epoc Normal file
View file

@ -0,0 +1,34 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz <hpicollo@worldnet.fr> and Peter Breitenlohner <peb@mppmu.mpg.de>
# Useful information for improving this file can be found at:
# http://software.frodo.looijaard.name/psiconv/formats/Index.html
#------------------------------------------------------------------------------
0 lelong 0x10000037 Psion Series 5
>4 lelong 0x10000042 multi-bitmap image
!:mime image/x-epoc-mbm
>4 lelong 0x1000006D
>>8 lelong 0x1000007D Sketch image
!:mime image/x-epoc-sketch
>>8 lelong 0x1000007F Word file
!:mime application/x-epoc-word
>>8 lelong 0x10000085 OPL program (TextEd)
!:mime application/x-epoc-opl
>>8 lelong 0x10000088 Sheet file
!:mime application/x-epoc-sheet
>4 lelong 0x10000073 OPO module
!:mime application/x-epoc-opo
>4 lelong 0x10000074 OPL application
!:mime application/x-epoc-app
0 lelong 0x10000050 Psion Series 5
>4 lelong 0x1000006D database
>>8 lelong 0x10000084 Agenda file
!:mime application/x-epoc-agenda
>>8 lelong 0x10000086 Data file
!:mime application/x-epoc-data
>>8 lelong 0x10000CEA Jotter file
!:mime application/x-epoc-jotter

12
magic/filesystems Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $
# filesystems: file(1) magic for different filesystems
#
# CDROM Filesystems
# Modified for UDF by gerardo.cacciari@gmail.com
32769 string CD001 #
!:mime application/x-iso9660-image
37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
!:mime application/x-iso9660-image

18
magic/flash Normal file
View file

@ -0,0 +1,18 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $
# flash: file(1) magic for Macromedia Flash file format
#
# See
#
# http://www.macromedia.com/software/flash/open/
#
0 string FWS Macromedia Flash data,
>3 byte x version %d
!:mime application/x-shockwave-flash
0 string CWS Macromedia Flash data (compressed),
!:mime application/x-shockwave-flash
# From: Cal Peake <cp@absolutedigital.net>
0 string FLV Macromedia Flash Video
!:mime video/x-flv

32
magic/fonts Normal file
View file

@ -0,0 +1,32 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $
# fonts: file(1) magic for font data
#
# X11 font files in SNF (Server Natural Format) format
# updated by Joerg Jenderek at Feb 2013
# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm
0 belong 00000004 X11 SNF font data, MSB first
#>104 belong 00000004 X11 SNF font data, MSB first
!:mime application/x-font-sfn
# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX
0 lelong 00000004
>104 lelong 00000004 X11 SNF font data, LSB first
!:mime application/x-font-sfn
# True Type fonts
0 string \000\001\000\000\000 TrueType font data
!:mime application/x-font-ttf
# Opentype font data from Avi Bercovich
0 string OTTO OpenType font data
!:mime application/vnd.ms-opentype
# Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu
0 string SplineFontDB: Spline Font Database
!:mime application/vnd.font-fontforge-sfd
# EOT
34 string LP Embedded OpenType (EOT)
!:mime application/vnd.ms-fontobject

7
magic/fortran Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
# FORTRAN source
0 regex/100 \^[Cc][\ \t] FORTRAN program
!:mime text/x-fortran
!:strength - 5

31
magic/frame Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# frame: file(1) magic for FrameMaker files
#
# This stuff came on a FrameMaker demo tape, most of which is
# copyright, but this file is "published" as witness the following:
#
# Note that this is the Framemaker Maker Interchange Format, not the
# Normal format which would be application/vnd.framemaker.
#
0 string \<MakerFile FrameMaker document
!:mime application/x-mif
0 string \<MIFFile FrameMaker MIF (ASCII) file
!:mime application/x-mif
0 search/1 \<MakerDictionary FrameMaker Dictionary text
!:mime application/x-mif
0 string \<MakerScreenFont FrameMaker Font file
!:mime application/x-mif
0 string \<MML FrameMaker MML file
!:mime application/x-mif
0 string \<BookFile FrameMaker Book file
!:mime application/x-mif
# XXX - this book entry should be verified, if you find one, uncomment this
#0 string \<Book\ FrameMaker Book (ASCII) file
#!:mime application/x-mif
#>6 string 3.0 (3.0)
#>6 string 2.0 (2.0)
#>6 string 1.0 (1.0)
0 string \<Maker Intermediate Print File FrameMaker IPL file
!:mime application/x-mif

13
magic/gimp Normal file
View file

@ -0,0 +1,13 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
# GIMP Gradient: file(1) magic for the GIMP's gradient data files
# by Federico Mena <federico@nuclecu.unam.mx>
#------------------------------------------------------------------------------
# XCF: file(1) magic for the XCF image format used in the GIMP developed
# by Spencer Kimball and Peter Mattis
# ('Bucky' LaDieu, nega@vt.edu)
0 string gimp\ xcf GIMP XCF image data,
!:mime image/x-xcf

23
magic/gnu Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $
# gnu: file(1) magic for various GNU tools
#
# GNU nlsutils message catalog file format
#
# GNU message catalog (.mo and .gmo files)
# GnuPG
# The format is very similar to pgp
# Note: magic.mime had 0x8501 for the next line instead of 0x8502
0 beshort 0x8502 GPG encrypted data
!:mime text/PGP # encoding: data
# This magic is not particularly good, as the keyrings don't have true
# magic. Nevertheless, it covers many keyrings.
0 beshort 0x9901 GPG key public ring
!:mime application/x-gnupg-keyring
# gettext message catalogue
0 regex \^msgid\ GNU gettext message catalogue text
!:mime text/x-po

8
magic/gnumeric Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# gnumeric: file(1) magic for Gnumeric spreadsheet
# This entry is only semi-helpful, as Gnumeric compresses its files, so
# they will ordinarily reported as "compressed", but at least -z helps
39 string =<gmr:Workbook Gnumeric spreadsheet
!:mime application/x-gnumeric

51
magic/icc Normal file
View file

@ -0,0 +1,51 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# icc: file(1) magic for International Color Consortium file formats
#
# Color profiles as per the ICC's "Image technology colour management -
# Architecture, profile format, and data structure" specification.
# See
#
# http://www.color.org/specification/ICC1v43_2010-12.pdf
#
# for Specification ICC.1:2010 (Profile version 4.3.0.0).
#
# Bytes 36 to 39 contain a generic profile file signature of "acsp";
# bytes 40 to 43 "may be used to identify the primary platform/operating
# system framework for which the profile was created".
#
# There are other fields that might be worth dumping as well.
#
# This appears to be what's used for Apple ColorSync profiles.
# Instead of adding that, Apple just changed the generic "acsp" entry
# to be for "ColorSync ICC Color Profile" rather than "Kodak Color
# Management System, ICC Profile".
# Yes, it's "APPL", not "AAPL"; see the spec.
36 string acspAPPL ColorSync ICC Profile
!:mime application/vnd.iccprofile
# Microsoft ICM color profile
36 string acspMSFT Microsoft ICM Color Profile
!:mime application/vnd.iccprofile
# Yes, that's a blank after "SGI".
36 string acspSGI\ SGI ICC Profile
!:mime application/vnd.iccprofile
# XXX - is this what's used for the Sun KCMS or not? The standard file
# uses just "acsp" for that, but Apple's file uses it for "ColorSync",
# and there *is* an identified "primary platform" value of SUNW.
36 string acspSUNW Sun KCMS ICC Profile
!:mime application/vnd.iccprofile
# Any other profile.
# XXX - should we use "acsp\0\0\0\0" for "no primary platform" profiles,
# and use "acsp" for everything else and dump the "primary platform"
# string in those cases?
36 string acsp ICC Profile
!:mime application/vnd.iccprofile

21
magic/iff Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
# iff: file(1) magic for Interchange File Format (see also "audio" & "images")
#
# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic
# Arts for file interchange. It has also been used by Apple, SGI, and
# especially Commodore-Amiga.
#
# IFF files begin with an 8 byte FORM header, followed by a 4 character
# FORM type, which is followed by the first chunk in the FORM.
0 string FORM IFF data
#>4 belong x \b, FORM is %d bytes long
# audio formats
>8 string AIFF \b, AIFF audio
!:mime audio/x-aiff
>8 string AIFC \b, AIFF-C compressed audio
!:mime audio/x-aiff
>8 string 8SVX \b, 8SVX 8-bit sampled sound voice
!:mime audio/x-aiff

255
magic/images Normal file
View file

@ -0,0 +1,255 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $
# images: file(1) magic for image formats (see also "iff", and "c-lang" for
# XPM bitmaps)
#
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
# additions by janl@ifi.uio.no as well as others. Jan also suggested
# merging several one- and two-line files into here.
#
# little magic: PCX (first byte is 0x0a)
# PBMPLUS images
# The next byte following the magic is always whitespace.
# strength is changed to try these patterns before "x86 boot sector"
0 search/1 P1
>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 search/1 P2
>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 search/1 P3 Netpbm PPM image text
>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P4
>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-bitmap
0 string P5
>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-greymap
0 string P6
>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data
>3 regex =[0-9]+\ \b, size = %sx
>>3 regex =\ [0-9]+ \b%s
!:strength + 45
!:mime image/x-portable-pixmap
0 string P7 Netpbm PAM image file
!:mime image/x-portable-pixmap
# NIFF (Navy Interchange File Format, a modification of TIFF) images
# [GRR: this *must* go before TIFF]
0 string IIN1 NIFF image data
!:mime image/x-niff
# Canon RAW version 1 (CRW) files are a type of Canon Image File Format
# (CIFF) file. These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html
0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data
!:mime image/x-canon-crw
# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic
# number. Put this above the TIFF test to make sure we detect them.
# These are apparently all little-endian.
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2
0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data
!:mime image/x-canon-cr2
# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
# The second word of TIFF files is the TIFF version number, 42, which has
# never changed. The TIFF specification recommends testing for it.
0 string MM\x00\x2a TIFF image data, big-endian
!:mime image/tiff
0 string II\x2a\x00 TIFF image data, little-endian
!:mime image/tiff
0 string MM\x00\x2b Big TIFF image data, big-endian
!:mime image/tiff
0 string II\x2b\x00 Big TIFF image data, little-endian
!:mime image/tiff
# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
# (Greg Roelofs, newt@uchicago.edu)
# (Albert Cahalan, acahalan@cs.uml.edu)
#
# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
#
0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
!:mime image/png
# possible GIF replacements; none yet released!
# (Greg Roelofs, newt@uchicago.edu)
#
# GRR 950115: this was mine ("Zip GIF"):
0 string GIF94z ZIF image (GIF+deflate alpha)
!:mime image/x-unknown
#
# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
#
0 string FGF95a FGF image (GIF+deflate beta)
!:mime image/x-unknown
#
# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
# (best; not yet implemented):
#
0 string PBF PBF image (deflate compression)
!:mime image/x-unknown
# GIF
0 string GIF8 GIF image data
!:mime image/gif
!:apple 8BIMGIFf
# From: Joerg Jenderek <joerg.jen.der.ek@gmx.net>
# most files with the extension .EPA and some with .BMP
0 string \x11\x06 Award BIOS Logo, 136 x 84
!:mime image/x-award-bioslogo
0 string \x11\x09 Award BIOS Logo, 136 x 126
!:mime image/x-award-bioslogo
#0 string \x07\x1f BIOS Logo corrupted?
# http://www.blackfiveservices.co.uk/awbmtools.shtml
# http://biosgfx.narod.ru/v3/
# http://biosgfx.narod.ru/abr-2/
0 string AWBM
>4 leshort <1981 Award BIOS bitmap
!:mime image/x-award-bmp
# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu)
0 string BM
>14 leshort 12 PC bitmap, OS/2 1.x format
!:mime image/x-ms-bmp
>14 leshort 64 PC bitmap, OS/2 2.x format
!:mime image/x-ms-bmp
>14 leshort 40 PC bitmap, Windows 3.x format
!:mime image/x-ms-bmp
>14 leshort 128 PC bitmap, Windows NT/2000 format
!:mime image/x-ms-bmp
# XPM icons (Greg Roelofs, newt@uchicago.edu)
0 search/1 /*\ XPM\ */ X pixmap image text
!:mime image/x-xpmi
# DICOM medical imaging data
128 string DICM DICOM medical imaging data
!:mime application/dicom
# XWD - X Window Dump file.
# As described in /usr/X11R6/include/X11/XWDFile.h
# used by the xwd program.
# Bradford Castalia, idaeim, 1/01
# updated by Adam Buchbinder, 2/09
# The following assumes version 7 of the format; the first long is the length
# of the header, which is at least 25 4-byte longs, and the one at offset 8
# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth,
# which is a maximum of 32.
0 belong >100
>8 belong <3
>>12 belong <33
>>>4 belong 7 XWD X Window Dump image data
!:mime image/x-xwindowdump
# PCX image files
# From: Dan Fandrich <dan@coneharvesters.com>
# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX
# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt
# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000
# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT
0 ubelong&0xffF8fe00 0x0a000000
# for PCX bit depth > 0
>3 ubyte >0
# test for valid versions
>>1 ubyte <6
>>>1 ubyte !1 PCX
!:mime image/x-pcx
# Adobe Photoshop
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string 8BPS Adobe Photoshop Image
!:mime image/vnd.adobe.photoshop
# Summary: DjVu image / document
# Extension: .djvu
# Reference: http://djvu.org/docs/DjVu3Spec.djvu
# Submitted by: Stephane Loeuillet <stephane.loeuillet@tiscali.fr>
# Modified by (1): Abel Cheung <abelcheung@gmail.com>
0 string AT&TFORM
>12 string DJVM DjVu multiple page document
!:mime image/vnd.djvu
>12 string DJVU DjVu image or single page document
!:mime image/vnd.djvu
>12 string DJVI DjVu shared document
!:mime image/vnd.djvu
>12 string THUM DjVu page thumbnails
!:mime image/vnd.djvu
# Originally by Marc Espie
# Modified by Robert Minsk <robertminsk at yahoo.com>
# http://www.openexr.com/openexrfilelayout.pdf
0 lelong 20000630 OpenEXR image data,
!:mime image/x-exr
# SMPTE Digital Picture Exchange Format, SMPTE DPX
#
# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital
# Moving-Picture Exchange (DPX), v1.0, 18 February 1994
# Robert Minsk <robertminsk at yahoo.com>
0 string SDPX DPX image data, big-endian,
!:mime image/x-dpx
#-----------------------------------------------------------------------
# Hierarchical Data Format, used to facilitate scientific data exchange
# specifications at http://hdf.ncsa.uiuc.edu/
0 belong 0x0e031301 Hierarchical Data Format (version 4) data
!:mime application/x-hdf
0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data
!:mime application/x-hdf
# http://www.cartesianinc.com/Tech/
0 string CPC\262 Cartesian Perceptual Compression image
!:mime image/x-cpi
# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches
# From: Markus Heidelberg <markus.heidelberg at web.de>
0 string/t [BitmapInfo2] Polar Monitor Bitmap text
!:mime image/x-polar-monitor-bitmap
# Type: Olympus ORF raw images.
# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
0 string MMOR Olympus ORF raw image data, big-endian
!:mime image/x-olympus-orf
0 string IIRO Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
0 string IIRS Olympus ORF raw image data, little-endian
!:mime image/x-olympus-orf
# Type: Foveon X3F
# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf
# From: Adam Buchbinder <adam.buchbinder@gmail.com>
# Note that the MIME type isn't defined anywhere that I can find; if
# there's a canonical type for this format, it should replace this one.
0 string FOVb Foveon X3F raw image data
!:mime image/x-x3f
# Paint.NET file
# From Adam Buchbinder <adam.buchbinder@gmail.com>
0 string PDN3 Paint.NET image data
!:mime image/x-paintnet

16
magic/java Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
# same magic number, 0xcafebabe, so they are both handled
# in the entry called "cafebabe".
#------------------------------------------------------------
0 belong 0xfeedfeed Java KeyStore
!:mime application/x-java-keystore
0 belong 0xcececece Java JCE KeyStore
!:mime application/x-java-jce-keystore
# Java source
0 regex ^import.*;$ Java source
!:mime text/x-java

17
magic/javascript Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: $
# javascript: magic for javascript and node.js scripts.
#
0 search/1/w #!/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/node Node.js script text executable
!:mime application/javascript
0 search/1/w #!/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1/w #!/usr/bin/nodejs Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ node Node.js script text executable
!:mime application/javascript
0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable
!:mime application/javascript

31
magic/jpeg Normal file
View file

@ -0,0 +1,31 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $
# JPEG images
# SunOS 5.5.1 had
#
# 0 string \377\330\377\340 JPEG file
# 0 string \377\330\377\356 JPG file
#
# both of which turn into "JPEG image data" here.
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
!:apple 8BIMJPEG
!:strength +2
# From: David Santinoli <david@santinoli.com>
0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000
# From: Johan van der Knijff <johan.vanderknijff@kb.nl>
# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes
# https://github.com/bitsgalore/jp2kMagic
#
# Now read value of 'Brand' field, which yields a few possibilities:
>20 string \x6a\x70\x32\x20 Part 1 (JP2)
!:mime image/jp2
>20 string \x6a\x70\x78\x20 Part 2 (JPX)
!:mime image/jpx
>20 string \x6a\x70\x6d\x20 Part 6 (JPM)
!:mime image/jpm
>20 string \x6d\x6a\x70\x32 Part 3 (MJ2)
!:mime video/mj2

11
magic/kde Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
# kde: file(1) magic for KDE
0 string/t [KDE\ Desktop\ Entry] KDE desktop entry
!:mime application/x-kdelnk
0 string/t #\ KDE\ Config\ File KDE config file
!:mime application/x-kdelnk
0 string/t #\ xmcd xmcd database file for kscd
!:mime text/x-xmcd

30
magic/kml Normal file
View file

@ -0,0 +1,30 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
# Type: Google KML, formerly Keyhole Markup Language
# Future development of this format has been handed
# over to the Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string/t \<?xml
>20 search/400 \ xmlns=
>>&0 regex ['"]http://earth.google.com/kml Google KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: OpenGIS KML, formerly Keyhole Markup Language
# This standard is maintained by the
# Open Geospatial Consortium.
# http://www.opengeospatial.org/standards/kml/
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
!:mime application/vnd.google-earth.kml+xml
#------------------------------------------------------------------------------
# Type: Google KML Archive (ZIP based)
# http://code.google.com/apis/kml/documentation/kml_tut.html
# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
0 string PK\003\004
>4 byte 0x14
>>30 string doc.kml Compressed Google KML Document, including resources.
!:mime application/vnd.google-earth.kmz

22
magic/linux Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $
# linux: file(1) magic for Linux files
#
# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
# The following basic Linux magic is useful for reference, but using
# "long" magic is a better practice in order to avoid collisions.
#
# 2 leshort 100 Linux/i386
# >0 leshort 0407 impure executable (OMAGIC)
# >0 leshort 0410 pure executable (NMAGIC)
# >0 leshort 0413 demand-paged executable (ZMAGIC)
# >0 leshort 0314 demand-paged executable (QMAGIC)
#
# SYSLINUX boot logo files (from 'ppmtolss16' sources)
# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename:
# file extension .lss .16
0 lelong =0x1413f33d SYSLINUX' LSS16 image data
# syslinux-4.05/mime/image/x-lss16.xml
!:mime image/x-lss16

42
magic/lisp Normal file
View file

@ -0,0 +1,42 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# lisp: file(1) magic for lisp programs
#
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
# updated by Joerg Jenderek
# GRR: This lot is too weak
#0 string ;;
# windows INF files often begin with semicolon and use CRLF as line end
# lisp files are mainly created on unix system with LF as line end
#>2 search/4096 !\r Lisp/Scheme program text
#>2 search/4096 \r Windows INF file
0 search/4096 (setq\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defvar\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defparam\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (defun\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (autoload\ Lisp/Scheme program text
!:mime text/x-lisp
0 search/4096 (custom-set-variables\ Lisp/Scheme program text
!:mime text/x-lisp
# Emacs 18 - this is always correct, but not very magical.
0 string \012( Emacs v18 byte-compiled Lisp data
!:mime application/x-elc
# Emacs 19+ - ver. recognition added by Ian Springer
# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
# - Chris Chittleborough <cchittleborough@yahoo.com.au>
0 string ;ELC
>4 byte >18
>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data
!:mime application/x-elc
# From: David Allouche <david@allouche.net>
0 search/1 \<TeXmacs| TeXmacs document text
!:mime text/texmacs

17
magic/lua Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
# lua: file(1) magic for Lua scripting language
# URL: http://www.lua.org/
# From: Reuben Thomas <rrt@sc3d.org>, Seo Sanghyeon <tinuviel@sparcs.kaist.ac.kr>
# Lua scripts
0 search/1/w #!\ /usr/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/lua Lua script text executable
!:mime text/x-lua
0 search/1 #!/usr/bin/env\ lua Lua script text executable
!:mime text/x-lua
0 search/1 #!\ /usr/bin/env\ lua Lua script text executable
!:mime text/x-lua

7
magic/m4 Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for M4 scripts
#
0 regex \^dnl\ M4 macro processor script text
!:mime text/x-m4

21
magic/macintosh Normal file
View file

@ -0,0 +1,21 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $
# macintosh description
#
# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
# Daniel Quinlan, quinlan@yggdrasil.com
11 string must\ be\ converted\ with\ BinHex BinHex binary text
!:mime application/mac-binhex40
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
!:mime application/x-stuffit
!:apple SIT!SIT!
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
!:apple SIT!SIT!
#>162 string >0 : %s

35
magic/mail.news Normal file
View file

@ -0,0 +1,35 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $
# mail.news: file(1) magic for mail and news
#
# Unfortunately, saved netnews also has From line added in some news software.
#0 string From mail text
0 string/t Relay-Version: old news text
!:mime message/rfc822
0 string/t #!\ rnews batched news text
!:mime message/rfc822
0 string/t N#!\ rnews mailed, batched news text
!:mime message/rfc822
0 string/t Forward\ to mail forwarding text
!:mime message/rfc822
0 string/t Pipe\ to mail piping text
!:mime message/rfc822
0 string/tc delivered-to: SMTP mail text
!:mime message/rfc822
0 string/tc return-path: SMTP mail text
!:mime message/rfc822
0 string/t Path: news text
!:mime message/news
0 string/t Xref: news text
!:mime message/news
0 string/t From: news or mail text
!:mime message/rfc822
0 string/t Article saved news text
!:mime message/news
0 string/t Received: RFC 822 mail text
!:mime message/rfc822
# TNEF files...
0 lelong 0x223E9F78 Transport Neutral Encapsulation Format
!:mime application/vnd.ms-tnef

16
magic/make Normal file
View file

@ -0,0 +1,16 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# make: file(1) magic for makefiles
#
0 regex \^CFLAGS makefile script text
!:mime text/x-makefile
0 regex \^LDFLAGS makefile script text
!:mime text/x-makefile
0 regex \^all: makefile script text
!:mime text/x-makefile
0 regex \^.PRECIOUS makefile script text
!:mime text/x-makefile
0 regex \^SUBDIRS automake makefile script text
!:mime text/x-makefile

29
magic/marc21 Normal file
View file

@ -0,0 +1,29 @@
# See COPYING file in this directory for original libmagic copyright.
#--------------------------------------------
# marc21: file(1) magic for MARC 21 Format
#
# Kevin Ford (kefo@loc.gov)
#
# MARC21 formats are for the representation and communication
# of bibliographic and related information in machine-readable
# form. For more info, see http://www.loc.gov/marc/
# leader position 20-21 must be 45
20 string 45
# leader starts with 5 digits, followed by codes specific to MARC format
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
!:mime application/marc
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
!:mime application/marc
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
!:mime application/marc
# leader position 22-23, should be "00" but is it?
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
!:mime application/marc

17
magic/matroska Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $
# matroska: file(1) magic for Matroska files
#
# See http://www.matroska.org/
#
# EBML id:
0 belong 0x1a45dfa3
# DocType id:
>4 search/4096 \x42\x82
# DocType contents:
>>&1 string webm WebM
!:mime video/webm
>>&1 string matroska Matroska data
!:mime video/x-matroska

9
magic/misctools Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#-----------------------------------------------------------------------------
# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $
# misctools: file(1) magic for miscellaneous UNIX tools.
#
0 string/c BEGIN:VCALENDAR vCalendar calendar file
!:mime text/calendar
0 string/c BEGIN:VCARD vCard visiting card
!:mime text/x-vcard

368
magic/msdos Normal file
View file

@ -0,0 +1,368 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
# msdos: file(1) magic for MS-DOS files
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek at Oct 2008,Apr 2011
0 string/t @
>1 string/cW \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW rem DOS batch file text
!:mime text/x-msdos-batch
>1 string/cW set\ DOS batch file text
!:mime text/x-msdos-batch
# Tests for various EXE types.
#
# Many of the compressed formats were extraced from IDARC 1.23 source code.
#
0 string/b MZ DOS MZ
!:mime application/x-dosexec
# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
>0x18 leshort <0x40 MS-DOS executable
# These traditional tests usually work but not always. When test quality support is
# implemented these can be turned on.
#>>0x18 leshort 0x1c (Borland compiler)
#>>0x18 leshort 0x1e (MS compiler)
# If the relocation table is 0x40 or more bytes into the file, it's definitely
# not a DOS EXE.
>0x18 leshort >0x3f
# Maybe it's a PE?
>>(0x3c.l) string PE\0\0 PE
>>>(0x3c.l+24) leshort 0x010b \b32 executable
>>>(0x3c.l+24) leshort 0x020b \b32+ executable
>>>(0x3c.l+24) leshort 0x0107 ROM image
>>>(0x3c.l+24) default x Unknown PE signature
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
>>>(0x3c.l+92) leshort 1 (native)
>>>(0x3c.l+92) leshort 2 (GUI)
>>>(0x3c.l+92) leshort 3 (console)
>>>(0x3c.l+92) leshort 7 (POSIX)
>>>(0x3c.l+92) leshort 9 (Windows CE)
>>>(0x3c.l+92) leshort 10 (EFI application)
>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
>>>(0x3c.l+92) leshort 13 (EFI ROM)
>>>(0x3c.l+92) leshort 14 (XBOX)
>>>(0x3c.l+92) leshort 15 (Windows boot application)
>>>(0x3c.l+92) default x (Unknown subsystem
>>>>&0 leshort x 0x%x)
>>>(0x3c.l+4) leshort 0x14c Intel 80386
>>>(0x3c.l+4) leshort 0x166 MIPS R4000
>>>(0x3c.l+4) leshort 0x168 MIPS R10000
>>>(0x3c.l+4) leshort 0x184 Alpha
>>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3
>>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4
>>>(0x3c.l+4) leshort 0x1c0 ARM
>>>(0x3c.l+4) leshort 0x1c2 ARM Thumb
>>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb
>>>(0x3c.l+4) leshort 0x1f0 PowerPC
>>>(0x3c.l+4) leshort 0x200 Intel Itanium
>>>(0x3c.l+4) leshort 0x266 MIPS16
>>>(0x3c.l+4) leshort 0x268 Motorola 68000
>>>(0x3c.l+4) leshort 0x290 PA-RISC
>>>(0x3c.l+4) leshort 0x366 MIPSIV
>>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU
>>>(0x3c.l+4) leshort 0xebc EFI byte code
>>>(0x3c.l+4) leshort 0x8664 x86-64
>>>(0x3c.l+4) leshort 0xc0ee MSIL
>>>(0x3c.l+4) default x Unknown processor type
>>>>&0 leshort x 0x%x
>>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB)
>>>(0x3c.l+22) leshort&0x1000 >0 system file
>>>(0x3c.l+24) leshort 0x010b
>>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
>>>(0x3c.l+24) leshort 0x020b
>>>>(0x3c.l+248) lelong >0 Mono/.Net assembly
# hooray, there's a DOS extender using the PE format, with a valid PE
# executable inside (which just prints a message and exits if run in win)
>>>(8.s*16) string 32STUB \b, 32rtm DOS extender
>>>(8.s*16) string !32STUB \b, for MS Windows
>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
>>>(0x3c.l+0xf8) search/0x140 UPX2
>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>(0x3c.l+0xf8) search/0x140 .idata
>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .rsrc
>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .data
>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
>>>>(0x3c.l+0xf7) byte x
>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
>>>0x30 string Inno \b, InnoSetup self-extracting archive
# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
# must be one of the unusual subformats.
>>(0x3c.l) string !PE\0\0 MS-DOS executable
>>(0x3c.l) string NE \b, NE
>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
>>>(0x3c.l+0x36) byte 3 for MS-DOS
>>>(0x3c.l+0x36) byte 4 for Windows 386
>>>(0x3c.l+0x36) byte 5 for Borland Operating System Services
>>>(0x3c.l+0x36) default x
>>>>(0x3c.l+0x36) byte x (unknown OS %x)
>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
>>(0x3c.l) string LX\0\0 \b, LX
>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
>>>(0x3c.l+0x0a) leshort 1 for OS/2
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
>>>(0x3c.l+0x08) leshort 1 i80286
>>>(0x3c.l+0x08) leshort 2 i80386
>>>(0x3c.l+0x08) leshort 3 i80486
>>>(8.s*16) string emx \b, emx
>>>>&1 string x %s
>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
# MS Windows system file, supposedly a collection of LE executables
>>(0x3c.l) string W3 \b, W3 for MS Windows
>>(0x3c.l) string LE\0\0 \b, LE executable
>>>(0x3c.l+0x0a) leshort 1
# some DOS extenders use LE files with OS/2 header
>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
# this is a wild guess; hopefully it is a specific signature
>>>>&0x24 lelong <0x50
>>>>>(&0x4c.l) string \xfc\xb8WATCOM
>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
# fails with DOS-Extenders.
>>>(0x3c.l+0x0a) leshort 2 for MS Windows
>>>(0x3c.l+0x0a) leshort 3 for DOS
>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
# looks like ASCII, probably some embedded copyright message.
# and definitely not NE/LE/LX/PE
>>0x3c lelong >0x20000000
>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
# header data too small for extended executable
>2 long !0
>>0x18 leshort <0x40
>>>(4.s*512) leshort !0x014c
>>>>&(2.s-514) string !LE
>>>>>&-2 string !BW \b, MZ for MS-DOS
>>>>&(2.s-514) string LE \b, LE
>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
# educated guess since indirection is still not capable enough for complex offset
# calculations (next embedded executable would be at &(&2*512+&0-2)
# I suspect there are only LE executables in these multi-exe files
>>>>&(2.s-514) string BW
>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
# This sequence skips to the first COFF segment, usually .text
>(4.s*512) leshort 0x014c \b, COFF
>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
>>(8.s*16) string emx
>>>&1 string x for DOS, Win or OS/2, emx %s
>>&(&0x42.l-3) byte x
>>>&0x26 string UPX \b, UPX compressed
# and yet another guess: small .text, and after large .data is unusal, could be 32lite
>>&0x2c search/0xa0 .text
>>>&0x0b lelong <0x2000
>>>>&0 lelong >0x6000 \b, 32lite compressed
>(8.s*16) string $WdX \b, WDos/X DOS extender
# By now an executable type should have been printed out. The executable
# may be a self-uncompressing archive, so look for evidence of that and
# print it out.
#
# Some signatures below from Greg Roelofs, newt@uchicago.edu.
#
>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
>0xe7 string LH/2\ Self-Extract \b, %s
>0x1c string UC2X \b, UCEXE compressed
>0x1c string WWP\ \b, WWPACK compressed
>0x1c string RJSX \b, ARJ self-extracting archive
>0x1c string diet \b, diet compressed
>0x1c string LZ09 \b, LZEXE v0.90 compressed
>0x1c string LZ91 \b, LZEXE v0.91 compressed
>0x1c string tz \b, TinyProg compressed
>0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
!:mime application/zip
# Yes, this really is "Copr", not "Corp."
>0x1e string PKLITE\ Copr. Self-extracting PKZIP archive
!:mime application/zip
# winarj stores a message in the stub instead of the sig in the MZ header
>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
>0x20 string AIN
>>0x23 string 2 \b, AIN 2.x compressed
>>0x23 string <2 \b, AIN 1.x compressed
>>0x23 string >2 \b, AIN 1.x compressed
>0x24 string LHa's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string LHA's\ SFX \b, LHa self-extracting archive
!:mime application/x-lha
>0x24 string \ $ARX \b, ARX self-extracting archive
>0x24 string \ $LHarc \b, LHarc self-extracting archive
>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
>0x40 string aPKG \b, aPackage self-extracting archive
>0x64 string W\ Collis\0\0 \b, Compack compressed
>0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
>>&0xf4 search/0x140 \x0\x40\x1\x0
>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
>1638 string -lh5- \b, LHa self-extracting archive v2.13S
>0x17888 string Rar! \b, RAR self-extracting archive
# Skip to the end of the EXE. This will usually work fine in the PE case
# because the MZ image is hardcoded into the toolchain and almost certainly
# won't match any of these signatures.
>(4.s*512) long x
>>&(2.s-517) byte x
>>>&0 string PK\3\4 \b, ZIP self-extracting archive
>>>&0 string Rar! \b, RAR self-extracting archive
>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
>>>&7 search/400 **ACE** \b, ACE self-extracting archive
>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
# a few unknown ZIP sfxes, no idea if they are needed or if they are
# already captured by the generic patterns above
>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
#
# TELVOX Teleinformatica CODEC self-extractor for OS/2:
>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
>>49824 leshort =1 \b, 1 file
>>49824 leshort >1 \b, %u files
# Popular applications
2080 string Microsoft\ Word\ 6.0\ Document %s
!:mime application/msword
2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
!:mime application/msword
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
2112 string MSWordDoc Microsoft Word document data
!:mime application/msword
#
0 belong 0x31be0000 Microsoft Word Document
!:mime application/msword
#
0 string/b PO^Q` Microsoft Word 6.0 Document
!:mime application/msword
#
0 string/b \376\067\0\043 Microsoft Office Document
!:mime application/msword
0 string/b \333\245-\0\0\0 Microsoft Office Document
!:mime application/msword
512 string/b \354\245\301 Microsoft Word Document
!:mime application/msword
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
#
2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
!:mime application/vnd.ms-excel
#
0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document
!:mime application/msword
2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
!:mime application/vnd.ms-excel
#
# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
2114 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
# Italian MS-Excel
2121 string Biff5 Microsoft Excel 5.0 Worksheet
!:mime application/vnd.ms-excel
0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
!:mime application/vnd.ms-excel
#
0 belong 0x00001a00 Lotus 1-2-3
!:mime application/x-123
#
0 belong 0x00000200 Lotus 1-2-3
!:mime application/x-123
0 string/b WordPro\0 Lotus WordPro
!:mime application/vnd.lotus-wordpro
0 string/b WordPro\r\373 Lotus WordPro
!:mime application/vnd.lotus-wordpro
# Windows icons (Ian Springer <ips@fpk.hp.com>)
0 string/b \000\000\001\000 MS Windows icon resource
!:mime image/x-icon
# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
# only for windows versions equal or greater 3.0
0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File
!:mime application/x-dosexec
# TNEF magic From "Joomy" <joomy@se-ed.net>
# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
0 leshort 0x223e9f78 TNEF
!:mime application/vnd.ms-tnef
#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed
# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
# False positive with PPT (also currently this string is too long)
#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document
#>48 byte 0x1B Excel Document
#!:mime application/vnd.ms-excel
>546 string bjbj Microsoft Word Document
!:mime application/msword
>546 string jbjb Microsoft Word Document
!:mime application/msword
0 string/b \224\246\056 Microsoft Word Document
!:mime application/msword
512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
!:mime application/msword
# MS eBook format (.lit)
0 string/b ITOLITLS Microsoft Reader eBook Data
>8 lelong x \b, version %u
!:mime application/x-ms-reader

12
magic/neko Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------
# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
# From: Mikhail Gusarov <dottedmag@dottedmag.net>
# NekoVM (http://nekovm.org/) bytecode
0 string NEKO NekoVM bytecode
>4 lelong x (%d global symbols,
>8 lelong x %d global fields,
>12 lelong x %d bytecode ops)
!:mime application/x-nekovm-bytecode

11
magic/pascal Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pascal: file(1) magic for Pascal source
#
0 search/8192 (input, Pascal source text
!:mime text/x-pascal
0 regex \^program Pascal source text
!:mime text/x-pascal
0 regex \^record Pascal source text
!:mime text/x-pascal

8
magic/pdf Normal file
View file

@ -0,0 +1,8 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pdf: file(1) magic for Portable Document Format
#
0 string %PDF- PDF document
!:mime application/pdf

26
magic/perl Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $
# perl: file(1) magic for Larry Wall's perl language.
#
# The `eval' lines recognizes an outrageously clever hack.
# Keith Waclena <keith@cerberus.uchicago.edu>
# Send additions to <perl5-porters@perl.org>
0 search/1/w #!\ /bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/bin/perl Perl script text
!:mime text/x-perl
0 search/1/w #!\ /usr/local/bin/perl Perl script text executable
!:mime text/x-perl
0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text
!:mime text/x-perl
0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text
!:mime text/x-perl
0 search/1 #!/usr/bin/env\ perl Perl script text executable
!:mime text/x-perl
0 search/1 #!\ /usr/bin/env\ perl Perl script text executable
!:mime text/x-perl

27
magic/pgp Normal file
View file

@ -0,0 +1,27 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pgp: file(1) magic for Pretty Good Privacy
# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
#
0 beshort 0x9900 PGP key public ring
!:mime application/x-pgp-keyring
0 beshort 0x9501 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0x9500 PGP key security ring
!:mime application/x-pgp-keyring
0 beshort 0xa600 PGP encrypted data
#!:mime application/pgp-encrypted
#0 string -----BEGIN\040PGP text/PGP armored data
!:mime text/PGP # encoding: armored data
#>15 string PUBLIC\040KEY\040BLOCK- public key block
#>15 string MESSAGE- message
#>15 string SIGNED\040MESSAGE- signed message
#>15 string PGP\040SIGNATURE- signature
2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block
!:mime application/pgp-keys
0 string -----BEGIN\040PGP\40MESSAGE- PGP message
!:mime application/pgp
0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature
!:mime application/pgp-signature

7
magic/pkgadd Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# pkgadd: file(1) magic for SysV R4 PKG Datastreams
#
0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4)
!:mime application/x-svr4-package

14
magic/printer Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $
# printer: file(1) magic for printer-formatted files
#
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT

46
magic/python Normal file
View file

@ -0,0 +1,46 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $
# python: file(1) magic for python
#
0 search/1/w #!\ /usr/bin/python Python script text executable
!:mime text/x-python
0 search/1/w #!\ /usr/local/bin/python Python script text executable
!:mime text/x-python
0 search/1 #!/usr/bin/env\ python Python script text executable
!:mime text/x-python
0 search/1 #!\ /usr/bin/env\ python Python script text executable
!:mime text/x-python
# from module.submodule import func1, func2
0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
!:mime text/x-python
# def __init__ (self, ...):
0 search/4096 def\ __init__
>&0 search/64 self Python script text executable
!:mime text/x-python
# comments
0 search/4096 '''
>&0 regex .*'''$ Python script text executable
!:mime text/x-python
0 search/4096 """
>&0 regex .*"""$ Python script text executable
!:mime text/x-python
# try:
# except: or finally:
# block
0 search/4096 try:
>&0 regex \^\\s*except.*: Python script text executable
!:mime text/x-python
>&0 search/4096 finally: Python script text executable
!:mime text/x-python
# def name(args, args):
0 regex \^(\ |\\t)*def\ +[a-zA-Z]+
>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
!:mime text/x-python

36
magic/riff Normal file
View file

@ -0,0 +1,36 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $
# riff: file(1) magic for RIFF format
# See
#
# http://www.seanet.com/users/matts/riffmci/riffmci.htm
#
# AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
#
0 string RIFF RIFF (little-endian) data
# Microsoft WAVE format (*.wav)
>8 string WAVE \b, WAVE audio
!:mime audio/x-wav
# Corel Draw Picture
>8 string CDRA \b, Corel Draw Picture
!:mime image/x-coreldraw
# AVI == Audio Video Interleave
>8 string AVI\040 \b, AVI
!:mime video/x-msvideo
#------------------------------------------------------------------------------
# Sony Wave64
# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf
# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian
0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data
# 128 bit + total file size (64 bits) so 24 bytes
# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A }
>24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio
!:mime audio/x-w64
#------------------------------------------------------------------------------
# MBWF/RF64
# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf
0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio
!:mime audio/x-wav

12
magic/rpm Normal file
View file

@ -0,0 +1,12 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $
#
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
#
0 belong 0xedabeedb RPM
!:mime application/x-rpm
#delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM
!:mime application/x-rpm

9
magic/rtf Normal file
View file

@ -0,0 +1,9 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# rtf: file(1) magic for Rich Text Format (RTF)
#
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
#
0 string {\\rtf Rich Text Format data,
!:mime text/rtf

28
magic/ruby Normal file
View file

@ -0,0 +1,28 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $
# ruby: file(1) magic for Ruby scripting language
# URL: http://www.ruby-lang.org/
# From: Reuben Thomas <rrt@sc3d.org>
# Ruby scripts
0 search/1/w #!\ /usr/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!/usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable
!:mime text/x-ruby
# What looks like ruby, but does not have a shebang
# (modules and such)
# From: Lubomir Rintel <lkundrak@v3.sk>
0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+'
>0 regex include\ [A-Z]|def\ [a-z]|\ do$
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text
!:mime text/x-ruby
0 regex \^[\ \t]*(class|module)[\ \t][A-Z]
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text
!:mime text/x-ruby

7
magic/sc Normal file
View file

@ -0,0 +1,7 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# sc: file(1) magic for "sc" spreadsheet
#
38 string Spreadsheet sc spreadsheet file
!:mime application/x-sc

82
magic/sgml Normal file
View file

@ -0,0 +1,82 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $
# Type: SVG Vectorial Graphics
# From: Noel Torres <tecnico@ejerciciosresueltos.com>
0 string \<?xml\ version="
>15 string >\0
>>19 search/4096 \<svg SVG Scalable Vector Graphics image
!:mime image/svg+xml
>>19 search/4096 \<gnc-v2 GnuCash file
!:mime application/x-gnucash
# Sitemap file
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096 \<urlset XML Sitemap document text
!:mime application/xml-sitemap
# xhtml
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version='
>15 string >\0
>>19 search/4096/cWbt \<!doctype\ html XHTML document text
!:mime text/html
0 string/t \<?xml\ version="
>15 string >\0
>>19 search/4096/cWbt \<html broken XHTML document text
!:mime text/html
#------------------------------------------------------------------------------
# sgml: file(1) magic for Standard Generalized Markup Language
# HyperText Markup Language (HTML) is an SGML document type,
# from Daniel Quinlan (quinlan@yggdrasil.com)
# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
0 search/4096/cWt \<!doctype\ html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<head HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<title HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<html HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<script HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<style HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<table HTML document text
!:mime text/html
!:strength + 5
0 search/4096/cwt \<a\ href= HTML document text
!:mime text/html
!:strength + 5
# Extensible markup language (XML), a subset of SGML
# from Marc Prud'hommeaux (marc@apocalypse.org)
0 search/1/cwt \<?xml XML document text
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version\ " XML
!:mime application/xml
!:strength + 5
0 string/t \<?xml\ version=" XML
!:mime application/xml
!:strength + 5
0 string \<?xml\ version=' XML
!:mime application/xml
!:strength + 5
0 search/1/wbt \<?xml XML document text
!:mime application/xml
!:strength - 10
0 search/1/wt \<?XML broken XML document text
!:mime application/xml
!:strength - 10

17
magic/sniffer Normal file
View file

@ -0,0 +1,17 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# sniffer: file(1) magic for packet capture files
#
# From: guy@alum.mit.edu (Guy Harris)
#
#
# "libpcap" capture files.
# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
# the main program that uses that format, but there are other programs
# that use "libpcap", or that use the same capture file format.)
#
0 ubelong 0xa1b2c3d4 tcpdump capture file (big-endian)
!:mime application/vnd.tcpdump.pcap
0 ulelong 0xa1b2c3d4 tcpdump capture file (little-endian)
!:mime application/vnd.tcpdump.pcap

23
magic/tcl Normal file
View file

@ -0,0 +1,23 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# file: file(1) magic for Tcl scripting language
# URL: http://www.tcl.tk/
# From: gustaf neumann
# Tcl scripts
0 search/1/w #!\ /usr/bin/tcl Tcl script text executable
!:mime text/x-lua
0 search/1/w #!\ /usr/local/bin/tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ tcl Tcl script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1/w #!\ /usr/local/bin/wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!/usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl
0 search/1 #!\ /usr/bin/env\ wish Tcl/Tk script text executable
!:mime text/x-tcl

56
magic/tex Normal file
View file

@ -0,0 +1,56 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: tex,v 1.17 2010/09/20 19:19:17 rrt Exp $
# tex: file(1) magic for TeX files
#
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
#
# From <conklin@talisman.kaleida.com>
# Although we may know the offset of certain text fields in TeX DVI
# and font files, we can't use them reliably because they are not
# zero terminated. [but we do anyway, christos]
0 string \367\002 TeX DVI file
!:mime application/x-dvi
# There is no way to detect TeX Font Metric (*.tfm) files without
# breaking them apart and reading the data. The following patterns
# match most *.tfm files generated by METAFONT or afm2tfm.
2 string \000\021 TeX font metric data
!:mime application/x-tex-tfm
2 string \000\022 TeX font metric data
!:mime application/x-tex-tfm
# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/1 \\input\ texinfo Texinfo source text
!:mime text/x-texinfo
0 search/1 This\ is\ Info\ file GNU Info text
!:mime text/x-info
# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com)
0 search/4096 \\input TeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\section LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\setlength LaTeX document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\documentstyle LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\chapter LaTeX document text
!:mime text/x-tex
!:strength + 18
0 search/4096 \\documentclass LaTeX 2e document text
!:mime text/x-tex
!:strength + 15
0 search/4096 \\relax LaTeX auxiliary file
!:mime text/x-tex
!:strength + 15
0 search/4096 \\contentsline LaTeX table of contents
!:mime text/x-tex
!:strength + 15
0 search/4096 %\ -*-latex-*- LaTeX document text
!:mime text/x-tex

22
magic/troff Normal file
View file

@ -0,0 +1,22 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# troff: file(1) magic for *roff
#
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
# troff input
0 search/1 .\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '\\" troff or preprocessor input text
!:mime text/troff
0 search/1 '.\\" troff or preprocessor input text
!:mime text/troff
0 search/1 \\" troff or preprocessor input text
!:mime text/troff
0 search/1 ''' troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
!:mime text/troff

26
magic/vorbis Normal file
View file

@ -0,0 +1,26 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File$
# vorbis: file(1) magic for Ogg/Vorbis files
#
# From Felix von Leitner <leitner@fefe.de>
# Extended by Beni Cherniavsky <cben@crosswinds.net>
# Further extended by Greg Wooledge <greg@wooledge.org>
#
# Most (everything but the number of channels and bitrate) is commented
# out with `##' as it's not interesting to the average user. The most
# probable things advanced users would want to uncomment are probably
# the number of comments and the encoder version.
#
# FIXME: The first match has been made a search, so that it can skip
# over prepended ID3 tags. This will work for MIME type detection, but
# won't work for detecting other properties of the file (they all need
# to be made relative to the search). In any case, if the file has ID3
# tags, the ID3 information will be printed, not the Ogg information,
# so until that's fixed, this doesn't matter.
# FIXME[2]: Disable the above for now, since search assumes text mode.
#
# --- Ogg Framing ---
#0 search/1000 OggS Ogg data
0 string OggS Ogg data
!:mime application/ogg

14
magic/warc Normal file
View file

@ -0,0 +1,14 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
# warc: file(1) magic for WARC files
0 string WARC/ WARC Archive
>5 string x version %.4s
!:mime application/warc
#------------------------------------------------------------------------------
# Arc File Format from Internet Archive
# see http://www.archive.org/web/researcher/ArcFileFormat.php
0 string filedesc:// Internet Archive File
!:mime application/x-ia-arc

19
magic/windows Normal file
View file

@ -0,0 +1,19 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
# windows: file(1) magic for Microsoft Windows
#
# This file is mainly reserved for files where programs
# using them are run almost always on MS Windows 3.x or
# above, or files only used exclusively in Windows OS,
# where there is no better category to allocate for.
# For example, even though WinZIP almost run on Windows
# only, it is better to treat them as "archive" instead.
# For format usable in DOS, such as generic executable
# format, please specify under "msdos" file.
#
# From: Pal Tamas <folti@balabit.hu>
# Autorun File
0 string/c [autorun]\r\n Microsoft Windows Autorun file.
!:mime application/x-setupscript.

43
magic/wordprocessors Normal file
View file

@ -0,0 +1,43 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $
# wordprocessors: file(1) magic fo word processors.
#
# Hangul (Korean) Word Processor File
# From: Won-Kyu Park <wkpark@kldp.org>
512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000
!:mime application/x-hwp
# Quark Express from http://www.garykessler.net/library/file_sigs.html
2 string MMXPR3 Motorola Quark Express Document (English)
!:mime application/x-quark-xpress-3
#------------------------------------------------------------------------------
# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
#
# Contributor kenzo-:
# Reversed-engineered JS Ichitaro magic numbers
#
0 string DOC
>43 byte 0x14 Just System Word Processor Ichitaro v4
!:mime application/x-ichitaro4
0 string DOC
>43 byte 0x15 Just System Word Processor Ichitaro v5
!:mime application/x-ichitaro5
0 string DOC
>43 byte 0x16 Just System Word Processor Ichitaro v6
!:mime application/x-ichitaro6
# Type: Freemind mindmap documents
# From: Jamie Thompson <debian-bugs@jamie-thompson.co.uk>
0 string/w \<map\ version Freemind document
!:mime application/x-freemind
# Type: Scribus
# From: Werner Fink <werner@suse.de>
0 string \<SCRIBUSUTF8NEW\ Version Scribus Document
!:mime application/x-scribus

11
magic/xwindows Normal file
View file

@ -0,0 +1,11 @@
# See COPYING file in this directory for original libmagic copyright.
#------------------------------------------------------------------------------
# $File: xwindows,v 1.7 2011/05/03 01:44:17 christos Exp $
# xwindows: file(1) magic for various X/Window system file formats.
# Xcursor data
# X11 mouse cursor format defined in libXcursor, see
# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
0 string Xcur Xcursor data
!:mime image/x-xcursor

View file

@ -0,0 +1 @@
@load ./main.bro

View file

@ -0,0 +1,351 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: Analyzer;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[Analyzer] &log;
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[AnalyzerTag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
##
## f: the file.
##
## Returns: true if the timeout will be postponed, or false if analysis
## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Sends a sequential stream of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
global data_stream: function(source: string, data: string);
## Sends a non-sequential chunk of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
##
## offset: the offset within the file that this chunk starts.
global data_chunk: function(source: string, data: string, offset: count);
## Signals a content gap in the file bytestream.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## offset: the offset within the file that this gap starts.
##
## len: the number of bytes that are missing.
global gap: function(source: string, offset: count, len: count);
## Signals the total size of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## size: the number of bytes that comprise the full file.
global set_size: function(source: string, size: count);
## Signals the end of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
global eof: function(source: string);
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool
{
return __postpone_timeout(f$id);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function data_stream(source: string, data: string)
{
__data_stream(source, data);
}
function data_chunk(source: string, data: string, offset: count)
{
__data_chunk(source, data, offset);
}
function gap(source: string, offset: count, len: count)
{
__gap(source, offset, len);
}
function set_size(source: string, size: count)
{
__set_size(source, size);
}
function eof(source: string)
{
__eof(source);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -2,4 +2,5 @@
@load ./readers/ascii
@load ./readers/raw
@load ./readers/benchmark
@load ./readers/binary
@load ./readers/sqlite

View file

@ -0,0 +1,8 @@
##! Interface for the binary input reader.
module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
}

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite input reader.
##!
##! The defaults are set to match Bro's ASCII output.
module InputSQLite;
export {
## Separator between set elements.
## Please note that the separator has to be exactly one character long.
const set_separator = Input::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Input::unset_field &redef;
## String to use for empty fields.
const empty_field = Input::empty_field &redef;
}

View file

@ -2,5 +2,6 @@
@load ./postprocessors
@load ./writers/ascii
@load ./writers/dataseries
@load ./writers/sqlite
@load ./writers/elasticsearch
@load ./writers/none

View file

@ -189,6 +189,15 @@ export {
## .. bro:see:: Log::add_default_filter Log::remove_default_filter
global create_stream: function(id: ID, stream: Stream) : bool;
## Removes a logging stream completely, stopping all the threads.
##
## id: The ID enum to be associated with the new logging stream.
##
## Returns: True if a new stream was successfully removed.
##
## .. bro:see:: Log:create_stream
global remove_stream: function(id: ID) : bool;
## Enables a previously disabled logging stream. Disabled streams
## will not be written to until they are enabled again. New streams
## are enabled by default.
@ -442,6 +451,12 @@ function create_stream(id: ID, stream: Stream) : bool
return add_default_filter(id);
}
function remove_stream(id: ID) : bool
{
delete active_streams[id];
return __remove_stream(id);
}
function disable_stream(id: ID) : bool
{
delete active_streams[id];

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite log writer. Redefinable options are available
##! to tweak the output format of the SQLite reader.
module LogSQLite;
export {
## Separator between set elements.
const set_separator = Log::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Log::unset_field &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
const empty_field = Log::empty_field &redef;
}

View file

@ -113,7 +113,9 @@ event SumStats::send_data(uid: string, ss_name: string, data: ResultTable, clean
if ( |incoming_data| == 0 )
done = T;
event SumStats::cluster_ss_response(uid, ss_name, local_data, done);
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_ss_response(uid, ss_name, copy(local_data), done);
if ( ! done )
schedule 0.01 sec { SumStats::send_data(uid, ss_name, incoming_data, T) };
}
@ -139,7 +141,10 @@ event SumStats::cluster_key_request(uid: string, ss_name: string, key: Key, clea
if ( ss_name in result_store && key in result_store[ss_name] )
{
#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
event SumStats::cluster_key_response(uid, ss_name, key, result_store[ss_name][key], cleanup);
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_key_response(uid, ss_name, key, copy(result_store[ss_name][key]), cleanup);
}
else
{

View file

@ -1,4 +1,5 @@
@load ./average
@load ./last
@load ./max
@load ./min
@load ./sample

View file

@ -0,0 +1,58 @@
@load base/frameworks/sumstats
@load base/utils/queue
module SumStats;
export {
redef enum Calculation += {
## Keep last X observations in a queue
LAST
};
redef record Reducer += {
## number of elements to keep.
num_last_elements: count &default=0;
};
redef record ResultVal += {
## This is the queue where elements are maintained. Use the
## :bro:see:`SumStats::get_elements` function to get a vector of the current element values.
last_elements: Queue::Queue &optional;
};
## Get a vector of element values from a ResultVal.
global get_last: function(rv: ResultVal): vector of Observation;
}
function get_last(rv: ResultVal): vector of Observation
{
local s: vector of Observation = vector();
if ( rv?$last_elements )
Queue::get_vector(rv$last_elements, s);
return s;
}
hook register_observe_plugins()
{
register_observe_plugin(LAST, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( r$num_last_elements > 0 )
{
if ( ! rv?$last_elements )
rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
Queue::put(rv$last_elements, obs);
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples
if ( rv1?$last_elements && rv2?$last_elements )
result$last_elements = Queue::merge(rv1$last_elements, rv2$last_elements);
else if ( rv1?$last_elements )
result$last_elements = rv1$last_elements;
else if ( rv2?$last_elements )
result$last_elements = rv2$last_elements;
}

View file

@ -1,54 +1,118 @@
@load base/utils/queue
@load ../main
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Collect a sample of the last few observations.
## Get uniquely distributed random samples from the observation stream.
SAMPLE
};
redef record Reducer += {
## A number of sample Observations to collect.
samples: count &default=0;
num_samples: count &default=0;
};
redef record ResultVal += {
## This is the queue where samples are maintained. Use the
## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
samples: Queue::Queue &optional;
};
## This is the vector in which the samples are maintained.
samples: vector of Observation &default=vector();
## Get a vector of sample Observation values from a ResultVal.
global get_samples: function(rv: ResultVal): vector of Observation;
## Number of total observed elements.
sample_elements: count &default=0;
};
}
function get_samples(rv: ResultVal): vector of Observation
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability
# in the compose hook.
num_samples: count &default=0;
};
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
local s: vector of Observation = vector();
if ( rv?$samples )
Queue::get_vector(rv$samples, s);
return s;
if ( SAMPLE in r$apply )
rv$num_samples = r$num_samples;
}
function sample_add_sample(obs:Observation, rv: ResultVal)
{
++rv$sample_elements;
if ( |rv$samples| < rv$num_samples )
rv$samples[|rv$samples|] = obs;
else
{
local ra = rand(rv$sample_elements);
if ( ra < rv$num_samples )
rv$samples[ra] = obs;
}
}
hook register_observe_plugins()
{
register_observe_plugin(SAMPLE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$samples )
rv$samples = Queue::init([$max_len=r$samples]);
Queue::put(rv$samples, obs);
sample_add_sample(obs, rv);
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples
if ( rv1?$samples && rv2?$samples )
result$samples = Queue::merge(rv1$samples, rv2$samples);
else if ( rv1?$samples )
result$samples = rv1$samples;
else if ( rv2?$samples )
result$samples = rv2$samples;
if ( rv1$num_samples != rv2$num_samples )
{
Reporter::error("Merging sample sets with differing sizes is not supported");
return;
}
local num_samples = rv1$num_samples;
result$num_samples = num_samples;
if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
{
Reporter::error("Sample vector with too many elements. Aborting.");
return;
}
if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
{
if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
{
Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
return;
}
for ( i in rv1$samples )
sample_add_sample(rv1$samples[i], result);
for ( i in rv2$samples)
sample_add_sample(rv2$samples[i], result);
}
else
{
local other_vector: vector of Observation;
local othercount: count;
if ( rv1$sample_elements > rv2$sample_elements )
{
result$samples = copy(rv1$samples);
other_vector = rv2$samples;
othercount = rv2$sample_elements;
}
else
{
result$samples = copy(rv2$samples);
other_vector = rv1$samples;
othercount = rv1$sample_elements;
}
local totalcount = rv1$sample_elements + rv2$sample_elements;
result$sample_elements = totalcount;
for ( i in other_vector )
{
if ( rand(totalcount) <= othercount )
result$samples[i] = other_vector[i];
}
}
}

View file

@ -39,14 +39,14 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
if ( rv1?$unique_vals || rv2?$unique_vals )
{
if ( rv1?$unique_vals )
result$unique_vals = rv1$unique_vals;
result$unique_vals = copy(rv1$unique_vals);
if ( rv2?$unique_vals )
if ( ! result?$unique_vals )
result$unique_vals = rv2$unique_vals;
result$unique_vals = copy(rv2$unique_vals);
else
for ( val2 in rv2$unique_vals )
add result$unique_vals[val2];
add result$unique_vals[copy(val2)];
result$unique = |result$unique_vals|;
}

View file

@ -316,6 +316,73 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional;
};
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic
## internal metadata collected about a "file", which is essentially just a
## byte stream that is e.g. pulled from a network connection or possibly
## some other input source.
type fa_file: record {
## An identifier associated with a single file.
id: string;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional;
## The time at which the last activity for the file was seen.
last_active: time;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &default=default_file_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &default=default_file_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &optional;
} &redef;
## Fields of a SYN packet.
##
## .. bro:see:: connection_SYN_packet
@ -3034,3 +3101,4 @@ const snaplen = 8192 &redef;
@load base/frameworks/input
@load base/frameworks/file-analysis

View file

@ -1,6 +1,7 @@
##! Base DNS analysis script which tracks and logs DNS queries along with
##! their responses.
@load base/utils/queue
@load ./consts
module DNS;
@ -73,19 +74,6 @@ export {
total_replies: count &optional;
};
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Info &optional;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count] &optional;
};
## An event that can be handled to access the :bro:type:`DNS::Info`
## record as it is sent to the logging framework.
global log_dns: event(rec: Info);
@ -102,8 +90,32 @@ export {
##
## reply: The specific response information according to RR type/class.
global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string);
## A hook that is called whenever a session is being set.
## This can be used if additional initialization logic needs to happen
## when creating a new session value.
##
## c: The connection involved in the new session
##
## msg: The DNS message header information.
##
## is_query: Indicator for if this is being called for a query or a response.
global set_session: hook(c: connection, msg: dns_msg, is_query: bool);
}
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Queue::Queue;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count];
};
redef record connection += {
dns: Info &optional;
dns_state: State &optional;
@ -134,14 +146,6 @@ event bro_init() &priority=5
function new_session(c: connection, trans_id: count): Info
{
if ( ! c?$dns_state )
{
local state: State;
state$pending=table();
state$finished_answers=set();
c$dns_state = state;
}
local info: Info;
info$ts = network_time();
info$id = c$id;
@ -151,18 +155,37 @@ function new_session(c: connection, trans_id: count): Info
return info;
}
function set_session(c: connection, msg: dns_msg, is_query: bool)
hook set_session(c: connection, msg: dns_msg, is_query: bool) &priority=5
{
if ( ! c?$dns_state || msg$id !in c$dns_state$pending )
if ( ! c?$dns_state )
{
c$dns_state$pending[msg$id] = new_session(c, msg$id);
# Try deleting this transaction id from the set of finished answers.
# Sometimes hosts will reuse ports and transaction ids and this should
# be considered to be a legit scenario (although bad practice).
delete c$dns_state$finished_answers[msg$id];
local state: State;
c$dns_state = state;
}
c$dns = c$dns_state$pending[msg$id];
if ( msg$id !in c$dns_state$pending )
c$dns_state$pending[msg$id] = Queue::init();
local info: Info;
# If this is either a query or this is the reply but
# no Info records are in the queue (we missed the query?)
# we need to create an Info record and put it in the queue.
if ( is_query ||
Queue::len(c$dns_state$pending[msg$id]) == 0 )
{
info = new_session(c, msg$id);
Queue::put(c$dns_state$pending[msg$id], info);
}
if ( is_query )
# If this is a query, assign the newly created info variable
# so that the world looks correct to anything else handling
# this query.
c$dns = info;
else
# Peek at the next item in the queue for this trans_id and
# assign it to c$dns since this is a response.
c$dns = Queue::peek(c$dns_state$pending[msg$id]);
if ( ! is_query )
{
@ -190,7 +213,7 @@ function set_session(c: connection, msg: dns_msg, is_query: bool)
event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5
{
set_session(c, msg, is_orig);
hook set_session(c, msg, is_orig);
}
event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5
@ -200,9 +223,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
c$dns$AA = msg$AA;
c$dns$RA = msg$RA;
if ( msg$id in c$dns_state$finished_answers )
event conn_weird("dns_reply_seen_after_done", c, "");
if ( reply != "" )
{
if ( ! c$dns?$answers )
@ -217,7 +237,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
if ( c$dns?$answers && c$dns?$total_answers &&
|c$dns$answers| == c$dns$total_answers )
{
add c$dns_state$finished_answers[c$dns$trans_id];
# Indicate this request/reply pair is ready to be logged.
c$dns$ready = T;
}
@ -230,7 +249,7 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
{
Log::write(DNS::LOG, c$dns);
# This record is logged and no longer pending.
delete c$dns_state$pending[c$dns$trans_id];
Queue::get(c$dns_state$pending[c$dns$trans_id]);
delete c$dns;
}
}
@ -243,6 +262,7 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
c$dns$qclass_name = classes[qclass];
c$dns$qtype = qtype;
c$dns$qtype_name = query_types[qtype];
c$dns$Z = msg$Z;
# Decode netbios name queries
# Note: I'm ignoring the name type for now. Not sure if this should be
@ -250,8 +270,6 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
if ( c$id$resp_p == 137/udp )
query = decode_netbios_name(query);
c$dns$query = query;
c$dns$Z = msg$Z;
}
event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5
@ -339,6 +357,13 @@ event connection_state_remove(c: connection) &priority=-5
# If Bro is expiring state, we should go ahead and log all unlogged
# request/response pairs now.
for ( trans_id in c$dns_state$pending )
Log::write(DNS::LOG, c$dns_state$pending[trans_id]);
{
local infos: vector of Info;
Queue::get_vector(c$dns_state$pending[trans_id], infos);
for ( i in infos )
{
Log::write(DNS::LOG, infos[i]);
}
}
}

View file

@ -1,4 +1,5 @@
@load ./utils-commands
@load ./main
@load ./file-analysis
@load ./file-extract
@load ./gridftp

View file

@ -0,0 +1,47 @@
@load ./main
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module FTP;
export {
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_handle_string(c: connection): string
{
return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
if ( info$passive )
# FTP client initiates data channel.
if ( is_orig )
# Don't care about FTP client data.
return "";
else
# Do care about FTP server data.
return get_handle_string(c);
else
# FTP server initiates dta channel.
if ( is_orig )
# Do care about FTP server data.
return get_handle_string(c);
else
# Don't care about FTP client data.
return "";
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));
}

View file

@ -13,53 +13,76 @@ export {
const extraction_prefix = "ftp-item" &redef;
}
global extract_count: count = 0;
redef record Info += {
## On disk file where it was extracted to.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred.
extract_file: bool &default=F;
## Internal tracking of the total number of files extracted during this
## session.
num_extracted_files: count &default=0;
};
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=3
function get_extraction_name(f: fa_file): string
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
return r;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
local s = ftp_data_expected[id$resp_h, id$resp_p];
if ( ! f?$conns ) return;
if ( extract_file_types in s$mime_type )
for ( cid in f$conns )
{
s$extract_file = T;
++s$num_extracted_files;
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
local s = ftp_data_expected[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-4
event file_state_remove(f: fa_file) &priority=4
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
return;
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( ! f?$info ) return;
local s = ftp_data_expected[id$resp_h, id$resp_p];
if ( s$extract_file )
for ( filename in f$info$extracted_files )
{
local suffix = fmt("%d.dat", s$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
s$extraction_file = open(fname);
if ( s$passive )
set_contents_file(id, CONTENTS_RESP, s$extraction_file);
else
set_contents_file(id, CONTENTS_ORIG, s$extraction_file);
local s: FTP::Info;
s$ts = network_time();
s$tags = set();
s$user = "<ftp-data>";
s$extraction_file = filename;
if ( f?$conns )
for ( cid in f$conns )
{
s$uid = f$conns[cid]$uid;
s$id = cid;
}
Log::write(FTP::LOG, s);
}
}

View file

@ -16,7 +16,8 @@ export {
## List of commands that should have their command/response pairs logged.
const logged_commands = {
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT"
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
"EPSV"
} &redef;
## This setting changes if passwords used in FTP sessions are captured or not.
@ -25,6 +26,18 @@ export {
## User IDs that can be considered "anonymous".
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record {
## Time when the command was sent.
ts: time &log;
@ -43,8 +56,6 @@ export {
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
@ -53,7 +64,10 @@ export {
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log &default=set();
tags: set[string] &log;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making
## the default value '.', we can indicate that unless something
@ -93,6 +107,7 @@ export {
# Add the state tracking information variable to the connection record
redef record connection += {
ftp: Info &optional;
ftp_data_reuse: bool &default=F;
};
# Configure DPD
@ -103,7 +118,7 @@ redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
redef likely_server_ports += { 21/tcp, 2811/tcp };
# Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &create_expire=5mins;
global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;
event bro_init() &priority=5
{
@ -194,10 +209,20 @@ function ftp_message(s: Info)
# and may not be used in all commands so they need reset to "blank"
# values after logging.
delete s$mime_type;
delete s$mime_desc;
delete s$file_size;
# Same with data channel.
delete s$data_channel;
# Tags are cleared everytime too.
delete s$tags;
s$tags = set();
}
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
{
s$passive = chan$passive;
s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s;
expect_connection(chan$orig_h, chan$resp_h, chan$resp_p, ANALYZER_FTP_DATA,
5mins);
}
event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -232,9 +257,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
if ( data$valid )
{
c$ftp$passive=F;
ftp_data_expected[data$h, data$p] = c$ftp;
expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
$resp_h=data$h, $resp_p=data$p]);
}
else
{
@ -283,8 +307,8 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
if ( code == 229 && data$h == [::] )
data$h = c$id$resp_h;
ftp_data_expected[data$h, data$p] = c$ftp;
expect_connection(c$id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
$resp_h=data$h, $resp_p=data$p]);
}
else
{
@ -314,7 +338,6 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
}
}
event expected_connection_seen(c: connection, a: count) &priority=10
{
local id = c$id;
@ -330,16 +353,19 @@ event file_transferred(c: connection, prefix: string, descr: string,
{
local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-5
event connection_reused(c: connection) &priority=5
{
local id = c$id;
if ( [id$resp_h, id$resp_p] in ftp_data_expected )
delete ftp_data_expected[id$resp_h, id$resp_p];
if ( "ftp-data" in c$service )
c$ftp_data_reuse = T;
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c$ftp_data_reuse ) return;
delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
}
# Use state remove event to cover connections terminated by RST.

View file

@ -1,5 +1,6 @@
@load ./main
@load ./utils
@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract

View file

@ -0,0 +1,31 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
if ( c$http$range_request )
return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
{
if ( tag != ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -2,8 +2,7 @@
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-ident
@load base/utils/files
@load ./file-analysis
module HTTP;
@ -16,45 +15,70 @@ export {
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the
## content.
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
global extract_count: count = 0;
function get_extraction_name(f: fa_file): string
{
# Client body extraction is not currently supported in this script.
if ( is_orig )
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
return r;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
return;
}
if ( c$http$first_chunk )
local extracting: bool = F;
for ( cid in f$conns )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c$http$extract_file = T;
}
if ( c$http$extract_file )
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
}
if ( c$http?$extraction_file )
print c$http$extraction_file, data;
}
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http?$extraction_file )
close(c$http$extraction_file);
}

View file

@ -1,15 +1,11 @@
##! Calculate hashes for HTTP body transfers.
@load ./file-ident
@load ./main
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
@ -19,10 +15,6 @@ export {
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
};
## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
if ( is_orig || ! c?$http ) return;
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( c$http$first_chunk )
if ( f?$mime_type && generate_md5 in f$mime_type )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
c$http$md5_handle = md5_hash_init();
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
if ( c$http?$md5_handle )
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
event file_state_remove(f: fa_file) &priority=4
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
for ( cid in f$conns )
{
if ( is_orig || ! c?$http ) return;
local c: connection = f$conns[cid];
if ( c$http?$md5_handle )
{
local url = build_url_http(c$http);
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
if ( ! c?$http ) next;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c]);
}
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]?$md5_handle )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,37 +1,28 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file contents.
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
};
## Mapping between mime types and regular expressions for URLs
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern
## doesn't match the mime type that was discovered.
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
@ -43,43 +34,72 @@ export {
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls )
return;
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", msg, c$http$method, url);
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -1,144 +0,0 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}

Some files were not shown because too many files have changed in this diff Show more