mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 07:38:19 +00:00
FileAnalysis: libmagic tweaks.
Remove verbose file type detection and automatically strip out charset from mime type.
This commit is contained in:
parent
2fba37e277
commit
e81f2ae7b0
23 changed files with 27 additions and 70 deletions
|
@ -72,11 +72,6 @@ export {
|
|||
## inspection in *bof_buffer* field.
|
||||
bof_buffer_size: count &log &optional;
|
||||
|
||||
## A file type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the file type based on the first data seen.
|
||||
file_type: string &log &optional;
|
||||
|
||||
## A mime type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the mime type based on the first data seen.
|
||||
|
@ -249,7 +244,6 @@ function set_info(f: fa_file)
|
|||
f$info$overflow_bytes = f$overflow_bytes;
|
||||
f$info$timeout_interval = f$timeout_interval;
|
||||
f$info$bof_buffer_size = f$bof_buffer_size;
|
||||
if ( f?$file_type ) f$info$file_type = f$file_type;
|
||||
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
|
||||
if ( f?$conns )
|
||||
for ( cid in f$conns )
|
||||
|
|
|
@ -374,11 +374,6 @@ type fa_file: record {
|
|||
## This is also the buffer that's used for file/mime type detection.
|
||||
bof_buffer: string &optional;
|
||||
|
||||
## A file type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the file type based on the first data seen.
|
||||
file_type: string &optional;
|
||||
|
||||
## A mime type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the mime type based on the first data seen.
|
||||
|
|
|
@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5
|
|||
|
||||
c$http$mime_type = f$mime_type;
|
||||
|
||||
local mime_str: string = split1(f$mime_type, /;/)[1];
|
||||
local mime_str: string = c$http$mime_type;
|
||||
|
||||
if ( mime_str !in mime_types_extensions ) next;
|
||||
if ( ! c$http?$uri ) next;
|
||||
|
|
|
@ -45,10 +45,8 @@ int File::overflow_bytes_idx = -1;
|
|||
int File::timeout_interval_idx = -1;
|
||||
int File::bof_buffer_size_idx = -1;
|
||||
int File::bof_buffer_idx = -1;
|
||||
int File::file_type_idx = -1;
|
||||
int File::mime_type_idx = -1;
|
||||
|
||||
magic_t File::magic = 0;
|
||||
magic_t File::magic_mime = 0;
|
||||
|
||||
string File::salt;
|
||||
|
@ -69,10 +67,8 @@ void File::StaticInit()
|
|||
timeout_interval_idx = Idx("timeout_interval");
|
||||
bof_buffer_size_idx = Idx("bof_buffer_size");
|
||||
bof_buffer_idx = Idx("bof_buffer");
|
||||
file_type_idx = Idx("file_type");
|
||||
mime_type_idx = Idx("mime_type");
|
||||
|
||||
bro_init_magic(&magic, MAGIC_NONE);
|
||||
bro_init_magic(&magic_mime, MAGIC_MIME);
|
||||
|
||||
salt = BifConst::FileAnalysis::salt->CheckString();
|
||||
|
@ -247,18 +243,22 @@ bool File::BufferBOF(const u_char* data, uint64 len)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool File::DetectTypes(const u_char* data, uint64 len)
|
||||
bool File::DetectMIME(const u_char* data, uint64 len)
|
||||
{
|
||||
const char* desc = bro_magic_buffer(magic, data, len);
|
||||
const char* mime = bro_magic_buffer(magic_mime, data, len);
|
||||
|
||||
if ( desc )
|
||||
val->Assign(file_type_idx, new StringVal(desc));
|
||||
|
||||
if ( mime )
|
||||
val->Assign(mime_type_idx, new StringVal(mime));
|
||||
{
|
||||
const char* mime_end = strchr(mime, ';');
|
||||
|
||||
return desc || mime;
|
||||
if ( mime_end )
|
||||
// strip off charset
|
||||
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
|
||||
else
|
||||
val->Assign(mime_type_idx, new StringVal(mime));
|
||||
}
|
||||
|
||||
return mime;
|
||||
}
|
||||
|
||||
void File::ReplayBOF()
|
||||
|
@ -276,7 +276,7 @@ void File::ReplayBOF()
|
|||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
|
||||
DetectTypes(bs->Bytes(), bs->Len());
|
||||
DetectMIME(bs->Bytes(), bs->Len());
|
||||
|
||||
FileEvent(file_new);
|
||||
|
||||
|
@ -291,7 +291,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
|||
if ( first_chunk )
|
||||
{
|
||||
// TODO: this should all really be delayed until we attempt reassembly
|
||||
DetectTypes(data, len);
|
||||
DetectMIME(data, len);
|
||||
FileEvent(file_new);
|
||||
first_chunk = false;
|
||||
}
|
||||
|
@ -326,7 +326,7 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
|
||||
if ( missed_bof )
|
||||
{
|
||||
DetectTypes(data, len);
|
||||
DetectMIME(data, len);
|
||||
FileEvent(file_new);
|
||||
missed_bof = false;
|
||||
}
|
||||
|
|
|
@ -170,11 +170,11 @@ protected:
|
|||
void ReplayBOF();
|
||||
|
||||
/**
|
||||
* Does file/mime type detection and assigns types (if available) to
|
||||
* corresponding fields in #val.
|
||||
* @return whether a file or mime type was available.
|
||||
* Does mime type detection and assigns type (if available) to \c mime_type
|
||||
* field in #val.
|
||||
* @return whether mime type was available.
|
||||
*/
|
||||
bool DetectTypes(const u_char* data, uint64 len);
|
||||
bool DetectMIME(const u_char* data, uint64 len);
|
||||
|
||||
FileID id; /**< A pretty hash that likely identifies file */
|
||||
string unique; /**< A string that uniquely identifies file */
|
||||
|
@ -207,7 +207,6 @@ protected:
|
|||
*/
|
||||
static void StaticInit();
|
||||
|
||||
static magic_t magic;
|
||||
static magic_t magic_mime;
|
||||
|
||||
static string salt;
|
||||
|
@ -224,7 +223,6 @@ protected:
|
|||
static int timeout_interval_idx;
|
||||
static int bof_buffer_size_idx;
|
||||
static int bof_buffer_idx;
|
||||
static int file_type_idx;
|
||||
static int mime_type_idx;
|
||||
};
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
^J0.26 | 201
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
file_stream, Cx92a0ym5R8, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea
|
||||
file_chunk, Cx92a0ym5R8, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
FILE_NEW
|
||||
oDwT1BbzjM1, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
oDwT1BbzjM1, 1022920, 0
|
||||
|
@ -11,7 +10,6 @@ source: HTTP
|
|||
FILE_NEW
|
||||
oDwT1BbzjM1, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_TIMEOUT
|
||||
FILE_TIMEOUT
|
||||
|
|
|
@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
^J0.26 | 201
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
Cx92a0ym5R8, 4705, 0
|
||||
|
|
|
@ -3,5 +3,4 @@ Cx92a0ym5R8, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
^J0.26 | 201
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
|
|
|
@ -3,7 +3,6 @@ sidhzrR4IT8, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
The Nationa
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
sidhzrR4IT8, 16557, 0
|
||||
|
|
|
@ -3,7 +3,6 @@ kg59rqyYxN, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
{^J "origin
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
kg59rqyYxN, 197, 0
|
||||
|
|
|
@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
^J0.26 | 201
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
Cx92a0ym5R8, 4705, 0
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
FILE_NEW
|
||||
7gZBKVUgy4l, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_OVER_NEW_CONNECTION
|
||||
FILE_STATE_REMOVE
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
FILE_NEW
|
||||
oDwT1BbzjM1, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
oDwT1BbzjM1, 1022920, 0
|
||||
|
@ -11,7 +10,6 @@ source: HTTP
|
|||
FILE_NEW
|
||||
oDwT1BbzjM1, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_TIMEOUT
|
||||
FILE_STATE_REMOVE
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
FILE_NEW
|
||||
uHS14uhRKGe, 0, 0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_OVER_NEW_CONNECTION
|
||||
FILE_STATE_REMOVE
|
||||
|
|
|
@ -3,7 +3,6 @@ aFQKI8SPOL2, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
/*^J********
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
aFQKI8SPOL2, 2675, 0
|
||||
|
@ -17,7 +16,6 @@ CCU3vUEr06l, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
//-- Google
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
CCU3vUEr06l, 21421, 0
|
||||
|
@ -31,7 +29,6 @@ HCzA0dVwDPj, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
GIF89a^D\0^D\0\xb3
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
HCzA0dVwDPj, 94, 0
|
||||
|
@ -46,7 +43,6 @@ a1Zu1fteVEf, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
\x89PNG^M^J^Z^J\0\0\0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
a1Zu1fteVEf, 2349, 0
|
||||
|
@ -61,7 +57,6 @@ xXlF7wFdsR, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
\x89PNG^M^J^Z^J\0\0\0
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
xXlF7wFdsR, 27579, 0
|
||||
|
|
|
@ -3,7 +3,6 @@ v5HLI7MxPQh, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
hello world
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
v5HLI7MxPQh, 11, 0
|
||||
|
@ -18,7 +17,6 @@ PZS1XGHkIf1, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
{^J "origin
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
PZS1XGHkIf1, 366, 0
|
||||
|
|
|
@ -3,7 +3,6 @@ nYgPNGLrZf9, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
#separator
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
nYgPNGLrZf9, 311, 0
|
||||
|
|
|
@ -3,7 +3,6 @@ wqKMAamJVSb, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
PK^C^D^T\0\0\0^H\0\xae
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
wqKMAamJVSb, 42208, 0
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path file_analysis
|
||||
#open 2013-04-04-21-22-26
|
||||
#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256
|
||||
#types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string
|
||||
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
|
||||
#close 2013-04-04-21-22-26
|
||||
#open 2013-04-11-17-29-51
|
||||
#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256
|
||||
#types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string
|
||||
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
|
||||
#close 2013-04-11-17-29-51
|
||||
|
|
|
@ -3,7 +3,6 @@ cwR7l6Zctxb, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
Hello^M^J^M^J ^M
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
cwR7l6Zctxb, 79, 0
|
||||
|
@ -17,7 +16,6 @@ ZAOEQmRyxv1, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
<html xmlns
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
ZAOEQmRyxv1, 1918, 0
|
||||
|
@ -31,7 +29,6 @@ Ltd7QO7jEv3, 0, 0
|
|||
FILE_BOF_BUFFER
|
||||
Version 4.9
|
||||
FILE_TYPE
|
||||
file type is set
|
||||
mime type is set
|
||||
FILE_STATE_REMOVE
|
||||
Ltd7QO7jEv3, 10823, 0
|
||||
|
|
|
@ -10,7 +10,7 @@ BEGIN { FS="\t"; OFS="\t"; type_col = -1; desc_col = -1 }
|
|||
{
|
||||
if ( $i == "mime_type" )
|
||||
type_col = i-1;
|
||||
if ( $i == "mime_desc" || $i == "file_type" )
|
||||
if ( $i == "mime_desc" )
|
||||
desc_col = i-1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,17 +47,11 @@ event file_new(f: fa_file)
|
|||
print f$bof_buffer[0:10];
|
||||
}
|
||||
|
||||
if ( f?$file_type || f?$mime_type )
|
||||
print "FILE_TYPE";
|
||||
# not actually printing the values due to libmagic variances
|
||||
if ( f?$file_type )
|
||||
{
|
||||
print "file type is set";
|
||||
f$file_type = "set";
|
||||
}
|
||||
if ( f?$mime_type )
|
||||
{
|
||||
print "FILE_TYPE";
|
||||
print "mime type is set";
|
||||
# not actually printing the values due to libmagic variances
|
||||
f$mime_type = "set";
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue