mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Various minor changes related to file mime type detection.
- Improve or just remove some file magic signatures ported from libmagic that were too general and matched incorrectly too often. - Fix MHR script's use of fa_file$mime_type before checking if it's initialized. It may be uninitialized if no signatures match. - The "fa_file" record now contains a "mime_types" field that contains all magic signatures that matched the file content (where the "mime_type" field is just a shortcut for the strongest match).
This commit is contained in:
parent
0865b152bb
commit
095a68b2ec
15 changed files with 187 additions and 143 deletions
|
@ -569,7 +569,7 @@ signature file-magic-auto87 {
|
|||
# >>>>&0 search/1024,=\n (len=1), [""], swap_endian=0
|
||||
# >>>>>&0 search/1,=@@ (len=2), ["unified diff output text"], swap_endian=0
|
||||
signature file-magic-auto88 {
|
||||
file-mime "text/x-diff", 40
|
||||
file-mime "text/x-diff", 55
|
||||
file-magic /(.*)(\x2d\x2d\x2d )(.*)(\x0a)(.*)(\x2b\x2b\x2b )(.*)(\x0a)(.*)(\x40\x40)/
|
||||
}
|
||||
|
||||
|
@ -2643,7 +2643,7 @@ signature file-magic-auto388 {
|
|||
# >>&0 regex,= {0,50}\(([a-zA-Z]|,| ){1,500}\):$ (len=34), ["Python script text executable"], swap_endian=0
|
||||
signature file-magic-auto389 {
|
||||
file-mime "text/x-python", 64
|
||||
file-magic /(^( |\t){0,50}def {1,50}[a-zA-Z]{1,100})( {0,50}\(([a-zA-Z]|,| ){1,500}\):$)/
|
||||
file-magic /(.*)(( |\t){0,50}def {1,50}[a-zA-Z]{1,100})( {0,50}\(([a-zA-Z]|,| ){1,500}\):$)/
|
||||
}
|
||||
|
||||
# >0 search/4096,=\documentstyle (len=14), ["LaTeX document text"], swap_endian=0
|
||||
|
@ -2704,7 +2704,7 @@ signature file-magic-auto397 {
|
|||
# >>>0 regex,=^[ \t]*end([ \t]*[;#].*)?$ (len=24), ["Ruby script text"], swap_endian=0
|
||||
signature file-magic-auto398 {
|
||||
file-mime "text/x-ruby", 54
|
||||
file-magic /(^[ \x09]*require[ \x09]'[A-Za-z_\x2f]+')(include [A-Z]|def [a-z]| do$)(^[ \x09]*end([ \x09]*[;#].*)?$)/
|
||||
file-magic /(.*)([ \x09]*require[ \x09]'[A-Za-z_\x2f]+')(include [A-Z]|def [a-z]| do$)(^[ \x09]*end([ \x09]*[;#].*)?$)/
|
||||
}
|
||||
|
||||
# >0 search/1,=eval "exec /usr/local/bin/perl (len=30), ["Perl script text"], swap_endian=0
|
||||
|
@ -2760,7 +2760,7 @@ signature file-magic-auto406 {
|
|||
# >>>0 regex,=^[ \t]*end([ \t]*[;#].*)?$ (len=24), ["Ruby module source text"], swap_endian=0
|
||||
signature file-magic-auto407 {
|
||||
file-mime "text/x-ruby", 54
|
||||
file-magic /(^[ \x09]*(class|module)[ \x09][A-Z])((modul|includ)e [A-Z]|def [a-z])(^[ \x09]*end([ \x09]*[;#].*)?$)/
|
||||
file-magic /(.*)([ \x09]*(class|module)[ \x09][A-Z])((modul|includ)e [A-Z]|def [a-z])(^[ \x09]*end([ \x09]*[;#].*)?$)/
|
||||
}
|
||||
|
||||
# >512 string/b,=\354\245\301 (len=3), ["Microsoft Word Document"], swap_endian=0
|
||||
|
@ -2797,7 +2797,7 @@ signature file-magic-auto412 {
|
|||
# >0 regex,=^from\s+(\w|\.)+\s+import.*$ (len=28), ["Python script text executable"], swap_endian=0
|
||||
signature file-magic-auto413 {
|
||||
file-mime "text/x-python", 58
|
||||
file-magic /(^from\s+(\w|\.)+\s+import.*$)/
|
||||
file-magic /(.*)(from\s+(\w|\.)+\s+import.*$)/
|
||||
}
|
||||
|
||||
# >0 search/4096,=\contentsline (len=13), ["LaTeX table of contents"], swap_endian=0
|
||||
|
@ -3342,11 +3342,12 @@ signature file-magic-auto497 {
|
|||
file-magic /(.{4})(jP)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 regex,=^template[ \t\n]+ (len=15), ["C++ source text"], swap_endian=0
|
||||
signature file-magic-auto498 {
|
||||
file-mime "text/x-c++", 50
|
||||
file-magic /(^template[ \x09\x0a]+)/
|
||||
}
|
||||
#signature file-magic-auto498 {
|
||||
# file-mime "text/x-c++", 50
|
||||
# file-magic /(.*)(template[ \x09\x0a]+)/
|
||||
#}
|
||||
|
||||
# >0 search/c/1,=<?php (len=5), ["PHP script text"], swap_endian=0
|
||||
signature file-magic-auto499 {
|
||||
|
@ -3417,9 +3418,46 @@ signature file-magic-auto509 {
|
|||
# >0 regex,=^[ \t]{0,50}\.asciiz (len=19), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto510 {
|
||||
file-mime "text/x-asm", 49
|
||||
file-magic /(^[ \x09]{0,50}\.asciiz)/
|
||||
file-magic /(^[ \x09]{0,50}\.(asciiz|asciz|section|globl|align|even|byte|file|type))/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.globl (len=18), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto517 {
|
||||
# file-mime "text/x-asm", 48
|
||||
# file-magic /(^[ \x09]{0,50}\.globl)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.text (len=17), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto523 {
|
||||
# file-mime "text/x-asm", 47
|
||||
# file-magic /(^[ \x09]{0,50}\.text)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.even (len=17), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto524 {
|
||||
# file-mime "text/x-asm", 47
|
||||
# file-magic /(^[ \x09]{0,50}\.even)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.byte (len=17), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto525 {
|
||||
# file-mime "text/x-asm", 47
|
||||
# file-magic /(^[ \x09]{0,50}\.byte)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.file (len=17), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto526 {
|
||||
# file-mime "text/x-asm", 47
|
||||
# file-magic /(^[ \x09]{0,50}\.file)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.type (len=17), ["assembler source text"], swap_endian=0
|
||||
#signature file-magic-auto527 {
|
||||
# file-mime "text/x-asm", 47
|
||||
# file-magic /(^[ \x09]{0,50}\.type)/
|
||||
#}
|
||||
|
||||
|
||||
# >0 search/1,=#!/usr/bin/env perl (len=19), ["Perl script text executable"], swap_endian=0
|
||||
signature file-magic-auto511 {
|
||||
file-mime "text/x-perl", 49
|
||||
|
@ -3432,11 +3470,12 @@ signature file-magic-auto512 {
|
|||
file-magic /(.*)(\x3c\x21[dD][oO][cC][tT][yY][pP][eE] {1,}[hH][tT][mM][lL])/
|
||||
}
|
||||
|
||||
# This doesn't seem specific enough.
|
||||
# >0 regex,=^virtual[ \t\n]+ (len=14), ["C++ source text"], swap_endian=0
|
||||
signature file-magic-auto513 {
|
||||
file-mime "text/x-c++", 49
|
||||
file-magic /(^virtual[ \x09\x0a]+)/
|
||||
}
|
||||
#signature file-magic-auto513 {
|
||||
# file-mime "text/x-c++", 49
|
||||
# file-magic /(.*)(virtual[ \x09\x0a]+)/
|
||||
#}
|
||||
|
||||
# >0 search/1,=#! /usr/bin/env lua (len=19), ["Lua script text executable"], swap_endian=0
|
||||
signature file-magic-auto514 {
|
||||
|
@ -3455,13 +3494,6 @@ signature file-magic-auto516 {
|
|||
file-mime "text/x-tcl", 49
|
||||
file-magic /(.*)(\x23\x21 \x2fusr\x2fbin\x2fenv tcl)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.globl (len=18), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto517 {
|
||||
file-mime "text/x-asm", 48
|
||||
file-magic /(^[ \x09]{0,50}\.globl)/
|
||||
}
|
||||
|
||||
# >0 search/1,=#!/usr/bin/env tcl (len=18), ["Tcl script text executable"], swap_endian=0
|
||||
signature file-magic-auto518 {
|
||||
file-mime "text/x-tcl", 48
|
||||
|
@ -3489,37 +3521,7 @@ signature file-magic-auto521 {
|
|||
# >0 regex,=^class[ \t\n]+ (len=12), ["C++ source text"], swap_endian=0
|
||||
signature file-magic-auto522 {
|
||||
file-mime "text/x-c++", 47
|
||||
file-magic /(^class[ \x09\x0a]+)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.text (len=17), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto523 {
|
||||
file-mime "text/x-asm", 47
|
||||
file-magic /(^[ \x09]{0,50}\.text)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.even (len=17), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto524 {
|
||||
file-mime "text/x-asm", 47
|
||||
file-magic /(^[ \x09]{0,50}\.even)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.byte (len=17), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto525 {
|
||||
file-mime "text/x-asm", 47
|
||||
file-magic /(^[ \x09]{0,50}\.byte)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.file (len=17), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto526 {
|
||||
file-mime "text/x-asm", 47
|
||||
file-magic /(^[ \x09]{0,50}\.file)/
|
||||
}
|
||||
|
||||
# >0 regex,=^[ \t]{0,50}\.type (len=17), ["assembler source text"], swap_endian=0
|
||||
signature file-magic-auto527 {
|
||||
file-mime "text/x-asm", 47
|
||||
file-magic /(^[ \x09]{0,50}\.type)/
|
||||
file-magic /(.*)(class[ \x09\x0a]+[[:alnum:]_]+)(.*)(\x7b)(.*)(public:)/
|
||||
}
|
||||
|
||||
# >0 search/1,=This is Info file (len=17), ["GNU Info text"], swap_endian=0
|
||||
|
@ -3717,11 +3719,12 @@ signature file-magic-auto553 {
|
|||
file-magic /(.*)(\x5cinput texinfo)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 regex,=^private: (len=9), ["C++ source text"], swap_endian=0
|
||||
signature file-magic-auto554 {
|
||||
file-mime "text/x-c++", 44
|
||||
file-magic /(^private:)/
|
||||
}
|
||||
#signature file-magic-auto554 {
|
||||
# file-mime "text/x-c++", 44
|
||||
# file-magic /(.*)(private:)/
|
||||
#}
|
||||
|
||||
# >0 search/4096,=def __init__ (len=12), [""], swap_endian=0
|
||||
# >>&0 search/64,=self (len=4), ["Python script text executable"], swap_endian=0
|
||||
|
@ -3739,7 +3742,7 @@ signature file-magic-auto556 {
|
|||
# >0 regex,=^extern[ \t\n]+ (len=13), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto557 {
|
||||
file-mime "text/x-c", 43
|
||||
file-magic /(^extern[ \x09\x0a]+)/
|
||||
file-magic /(.*)(extern[ \x09\x0a]+)/
|
||||
}
|
||||
|
||||
# >0 search/4096,=% -*-latex-*- (len=13), ["LaTeX document text"], swap_endian=0
|
||||
|
@ -3748,16 +3751,17 @@ signature file-magic-auto558 {
|
|||
file-magic /(.*)(\x25 \x2d\x2a\x2dlatex\x2d\x2a\x2d)/
|
||||
}
|
||||
|
||||
# Doesn't seem specific enough.
|
||||
# >0 regex,=^double[ \t\n]+ (len=13), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto559 {
|
||||
file-mime "text/x-c", 43
|
||||
file-magic /(^double[ \x09\x0a]+)/
|
||||
}
|
||||
#signature file-magic-auto559 {
|
||||
# file-mime "text/x-c", 43
|
||||
# file-magic /(^double[ \x09\x0a]+)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^struct[ \t\n]+ (len=13), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto560 {
|
||||
file-mime "text/x-c", 43
|
||||
file-magic /(^struct[ \x09\x0a]+)/
|
||||
file-magic /(.*)(struct[ \x09\x0a]+)/
|
||||
}
|
||||
|
||||
# >0 search/w/1,=#!/bin/nodejs (len=13), ["Node.js script text executable"], swap_endian=0
|
||||
|
@ -3766,11 +3770,12 @@ signature file-magic-auto561 {
|
|||
file-magic /(.*)(\x23\x21\x2fbin\x2fnodejs)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 regex,=^public: (len=8), ["C++ source text"], swap_endian=0
|
||||
signature file-magic-auto562 {
|
||||
file-mime "text/x-c++", 43
|
||||
file-magic /(^public:)/
|
||||
}
|
||||
#signature file-magic-auto562 {
|
||||
# file-mime "text/x-c++", 43
|
||||
# file-magic /(.*)(public:)/
|
||||
#}
|
||||
|
||||
# >0 search/wct/4096,=<script (len=7), ["HTML document text"], swap_endian=0
|
||||
signature file-magic-auto563 {
|
||||
|
@ -3778,17 +3783,19 @@ signature file-magic-auto563 {
|
|||
file-magic /(.*)(\x3c[sS][cC][rR][iI][pP][tT])/
|
||||
}
|
||||
|
||||
# Doesn't seem specific enough.
|
||||
# >0 regex,=^float[ \t\n]+ (len=12), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto564 {
|
||||
file-mime "text/x-c", 42
|
||||
file-magic /(^float[ \x09\x0a]+)/
|
||||
}
|
||||
#signature file-magic-auto564 {
|
||||
# file-mime "text/x-c", 42
|
||||
# file-magic /(^float[ \x09\x0a]+)/
|
||||
#}
|
||||
|
||||
# Doesn't seem specific enough.
|
||||
# >0 regex,=^union[ \t\n]+ (len=12), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto565 {
|
||||
file-mime "text/x-c", 42
|
||||
file-magic /(^union[ \x09\x0a]+)/
|
||||
}
|
||||
#signature file-magic-auto565 {
|
||||
# file-mime "text/x-c", 42
|
||||
# file-magic /(^union[ \x09\x0a]+)/
|
||||
#}
|
||||
|
||||
# The use of non-sequential offsets and relational operations made the
|
||||
# autogenerated signature incorrrect.
|
||||
|
@ -3810,7 +3817,7 @@ signature file-magic-auto567 {
|
|||
# >0 regex,=^char[ \t\n]+ (len=11), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto568 {
|
||||
file-mime "text/x-c", 41
|
||||
file-magic /(^char[ \x09\x0a]+)/
|
||||
file-magic /(.*)(char[ \x09\x0a]+)/
|
||||
}
|
||||
|
||||
# >0 search/1,=#! (len=2), [""], swap_endian=0
|
||||
|
@ -3911,11 +3918,12 @@ signature file-magic-auto581 {
|
|||
file-magic /(.*)(main\x28)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 search/1,=\" (len=2), ["troff or preprocessor input text"], swap_endian=0
|
||||
signature file-magic-auto582 {
|
||||
file-mime "text/troff", 40
|
||||
file-magic /(.*)(\x5c\x22)/
|
||||
}
|
||||
#signature file-magic-auto582 {
|
||||
# file-mime "text/troff", 40
|
||||
# file-magic /(.*)(\x5c\x22)/
|
||||
#}
|
||||
|
||||
# >0 search/4096,=(defparam (len=10), ["Lisp/Scheme program text"], swap_endian=0
|
||||
signature file-magic-auto583 {
|
||||
|
@ -3929,16 +3937,17 @@ signature file-magic-auto584 {
|
|||
file-magic /(.*)(\x28autoload )/
|
||||
}
|
||||
|
||||
#This signature seems too generic.
|
||||
# >0 search/1,=diff (len=5), ["diff output text"], swap_endian=0
|
||||
signature file-magic-auto585 {
|
||||
file-mime "text/x-diff", 40
|
||||
file-magic /(.*)(diff )/
|
||||
}
|
||||
#signature file-magic-auto585 {
|
||||
# file-mime "text/x-diff", 40
|
||||
# file-magic /(.*)(diff )/
|
||||
#}
|
||||
|
||||
# >0 regex,=^#include (len=9), ["C source text"], swap_endian=0
|
||||
signature file-magic-auto586 {
|
||||
file-mime "text/x-c", 39
|
||||
file-magic /(^#include)/
|
||||
file-magic /(.*)(#include)/
|
||||
}
|
||||
|
||||
# >0 search/1,=.\" (len=3), ["troff or preprocessor input text"], swap_endian=0
|
||||
|
@ -4006,7 +4015,7 @@ signature file-magic-auto596 {
|
|||
# >0 regex,=^SUBDIRS (len=8), ["automake makefile script text"], swap_endian=0
|
||||
signature file-magic-auto597 {
|
||||
file-mime "text/x-makefile", 38
|
||||
file-magic /(^SUBDIRS)/
|
||||
file-magic /(.*)(SUBDIRS)/
|
||||
}
|
||||
|
||||
# >0 search/4096,=(defvar (len=8), ["Lisp/Scheme program text"], swap_endian=0
|
||||
|
@ -4015,11 +4024,12 @@ signature file-magic-auto598 {
|
|||
file-magic /(.*)(\x28defvar )/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 regex,=^program (len=8), ["Pascal source text"], swap_endian=0
|
||||
signature file-magic-auto599 {
|
||||
file-mime "text/x-pascal", 38
|
||||
file-magic /(^program)/
|
||||
}
|
||||
#signature file-magic-auto599 {
|
||||
# file-mime "text/x-pascal", 38
|
||||
# file-magic /(^program)/
|
||||
#}
|
||||
|
||||
# >0 search/1,=Only in (len=8), ["diff output text"], swap_endian=0
|
||||
signature file-magic-auto600 {
|
||||
|
@ -4027,11 +4037,12 @@ signature file-magic-auto600 {
|
|||
file-magic /(.*)(Only in )/
|
||||
}
|
||||
|
||||
# This signature doesn't seem specific enough.
|
||||
# >0 search/1,=*** (len=4), ["diff output text"], swap_endian=0
|
||||
signature file-magic-auto601 {
|
||||
file-mime "text/x-diff", 38
|
||||
file-magic /(.*)(\x2a\x2a\x2a )/
|
||||
}
|
||||
#signature file-magic-auto601 {
|
||||
# file-mime "text/x-diff", 38
|
||||
# file-magic /(.*)(\x2a\x2a\x2a )/
|
||||
#}
|
||||
|
||||
# >0 search/1,='.\" (len=4), ["troff or preprocessor input text"], swap_endian=0
|
||||
signature file-magic-auto602 {
|
||||
|
@ -4039,11 +4050,12 @@ signature file-magic-auto602 {
|
|||
file-magic /(.*)(\x27\x2e\x5c\x22)/
|
||||
}
|
||||
|
||||
# LDFLAGS appears in other contexts, e.g. shell script.
|
||||
# >0 regex,=^LDFLAGS (len=8), ["makefile script text"], swap_endian=0
|
||||
signature file-magic-auto603 {
|
||||
file-mime "text/x-makefile", 38
|
||||
file-magic /(^LDFLAGS)/
|
||||
}
|
||||
#signature file-magic-auto603 {
|
||||
# file-mime "text/x-makefile", 38
|
||||
# file-magic /(.*)(LDFLAGS)/
|
||||
#}
|
||||
|
||||
# >0 search/8192,="libhdr" (len=8), ["BCPL source text"], swap_endian=0
|
||||
signature file-magic-auto604 {
|
||||
|
@ -4051,16 +4063,17 @@ signature file-magic-auto604 {
|
|||
file-magic /(.*)(\x22libhdr\x22)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 regex,=^record (len=7), ["Pascal source text"], swap_endian=0
|
||||
signature file-magic-auto605 {
|
||||
file-mime "text/x-pascal", 37
|
||||
file-magic /(^record)/
|
||||
}
|
||||
#signature file-magic-auto605 {
|
||||
# file-mime "text/x-pascal", 37
|
||||
# file-magic /(^record)/
|
||||
#}
|
||||
|
||||
# >0 regex,=^CFLAGS (len=7), ["makefile script text"], swap_endian=0
|
||||
signature file-magic-auto606 {
|
||||
file-mime "text/x-makefile", 37
|
||||
file-magic /(^CFLAGS)/
|
||||
file-magic /(.*)(CFLAGS)/
|
||||
}
|
||||
|
||||
# >0 search/4096,=(defun (len=7), ["Lisp/Scheme program text"], swap_endian=0
|
||||
|
@ -4081,11 +4094,12 @@ signature file-magic-auto609 {
|
|||
file-magic /(.*)(\x28input\x2c)/
|
||||
}
|
||||
|
||||
# Not specific enough.
|
||||
# >0 search/1,=Index: (len=6), ["RCS/CVS diff output text"], swap_endian=0
|
||||
signature file-magic-auto610 {
|
||||
file-mime "text/x-diff", 36
|
||||
file-magic /(.*)(Index\x3a)/
|
||||
}
|
||||
#signature file-magic-auto610 {
|
||||
# file-mime "text/x-diff", 44
|
||||
# file-magic /(.*)(Index\x3a)/
|
||||
#}
|
||||
|
||||
# >0 search/4096,=(setq (len=6), ["Lisp/Scheme program text"], swap_endian=0
|
||||
signature file-magic-auto611 {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue