FileAnalysis: libmagic tweaks.

Remove verbose file type detection and automatically strip out charset
from mime type.
This commit is contained in:
Jon Siwek 2013-04-11 13:11:46 -05:00
parent 2fba37e277
commit e81f2ae7b0
23 changed files with 27 additions and 70 deletions

View file

@ -72,11 +72,6 @@ export {
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
@ -249,7 +244,6 @@ function set_info(f: fa_file)
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$file_type ) f$info$file_type = f$file_type;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )

View file

@ -374,11 +374,6 @@ type fa_file: record {
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.

View file

@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5
c$http$mime_type = f$mime_type;
local mime_str: string = split1(f$mime_type, /;/)[1];
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;

View file

@ -45,10 +45,8 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::file_type_idx = -1;
int File::mime_type_idx = -1;
magic_t File::magic = 0;
magic_t File::magic_mime = 0;
string File::salt;
@ -69,10 +67,8 @@ void File::StaticInit()
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
file_type_idx = Idx("file_type");
mime_type_idx = Idx("mime_type");
bro_init_magic(&magic, MAGIC_NONE);
bro_init_magic(&magic_mime, MAGIC_MIME);
salt = BifConst::FileAnalysis::salt->CheckString();
@ -247,18 +243,22 @@ bool File::BufferBOF(const u_char* data, uint64 len)
return true;
}
bool File::DetectTypes(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len)
{
const char* desc = bro_magic_buffer(magic, data, len);
const char* mime = bro_magic_buffer(magic_mime, data, len);
if ( desc )
val->Assign(file_type_idx, new StringVal(desc));
if ( mime )
val->Assign(mime_type_idx, new StringVal(mime));
{
const char* mime_end = strchr(mime, ';');
return desc || mime;
if ( mime_end )
// strip off charset
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
else
val->Assign(mime_type_idx, new StringVal(mime));
}
return mime;
}
void File::ReplayBOF()
@ -276,7 +276,7 @@ void File::ReplayBOF()
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
DetectTypes(bs->Bytes(), bs->Len());
DetectMIME(bs->Bytes(), bs->Len());
FileEvent(file_new);
@ -291,7 +291,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
if ( first_chunk )
{
// TODO: this should all really be delayed until we attempt reassembly
DetectTypes(data, len);
DetectMIME(data, len);
FileEvent(file_new);
first_chunk = false;
}
@ -326,7 +326,7 @@ void File::DataIn(const u_char* data, uint64 len)
if ( missed_bof )
{
DetectTypes(data, len);
DetectMIME(data, len);
FileEvent(file_new);
missed_bof = false;
}

View file

@ -170,11 +170,11 @@ protected:
void ReplayBOF();
/**
* Does file/mime type detection and assigns types (if available) to
* corresponding fields in #val.
* @return whether a file or mime type was available.
* Does mime type detection and assigns type (if available) to \c mime_type
* field in #val.
* @return whether mime type was available.
*/
bool DetectTypes(const u_char* data, uint64 len);
bool DetectMIME(const u_char* data, uint64 len);
FileID id; /**< A pretty hash that likely identifies file */
string unique; /**< A string that uniquely identifies file */
@ -207,7 +207,6 @@ protected:
*/
static void StaticInit();
static magic_t magic;
static magic_t magic_mime;
static string salt;
@ -224,7 +223,6 @@ protected:
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int file_type_idx;
static int mime_type_idx;
};

View file

@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
FILE_BOF_BUFFER
^J0.26 | 201
FILE_TYPE
file type is set
mime type is set
file_stream, Cx92a0ym5R8, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea
file_chunk, Cx92a0ym5R8, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea

View file

@ -1,7 +1,6 @@
FILE_NEW
oDwT1BbzjM1, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
oDwT1BbzjM1, 1022920, 0
@ -11,7 +10,6 @@ source: HTTP
FILE_NEW
oDwT1BbzjM1, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_TIMEOUT
FILE_TIMEOUT

View file

@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
FILE_BOF_BUFFER
^J0.26 | 201
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
Cx92a0ym5R8, 4705, 0

View file

@ -3,5 +3,4 @@ Cx92a0ym5R8, 0, 0
FILE_BOF_BUFFER
^J0.26 | 201
FILE_TYPE
file type is set
mime type is set

View file

@ -3,7 +3,6 @@ sidhzrR4IT8, 0, 0
FILE_BOF_BUFFER
The Nationa
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
sidhzrR4IT8, 16557, 0

View file

@ -3,7 +3,6 @@ kg59rqyYxN, 0, 0
FILE_BOF_BUFFER
{^J "origin
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
kg59rqyYxN, 197, 0

View file

@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0
FILE_BOF_BUFFER
^J0.26 | 201
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
Cx92a0ym5R8, 4705, 0

View file

@ -1,7 +1,6 @@
FILE_NEW
7gZBKVUgy4l, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE

View file

@ -1,7 +1,6 @@
FILE_NEW
oDwT1BbzjM1, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
oDwT1BbzjM1, 1022920, 0
@ -11,7 +10,6 @@ source: HTTP
FILE_NEW
oDwT1BbzjM1, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_TIMEOUT
FILE_STATE_REMOVE

View file

@ -1,7 +1,6 @@
FILE_NEW
uHS14uhRKGe, 0, 0
FILE_TYPE
file type is set
mime type is set
FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE

View file

@ -3,7 +3,6 @@ aFQKI8SPOL2, 0, 0
FILE_BOF_BUFFER
/*^J********
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
aFQKI8SPOL2, 2675, 0
@ -17,7 +16,6 @@ CCU3vUEr06l, 0, 0
FILE_BOF_BUFFER
//-- Google
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
CCU3vUEr06l, 21421, 0
@ -31,7 +29,6 @@ HCzA0dVwDPj, 0, 0
FILE_BOF_BUFFER
GIF89a^D\0^D\0\xb3
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
HCzA0dVwDPj, 94, 0
@ -46,7 +43,6 @@ a1Zu1fteVEf, 0, 0
FILE_BOF_BUFFER
\x89PNG^M^J^Z^J\0\0\0
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
a1Zu1fteVEf, 2349, 0
@ -61,7 +57,6 @@ xXlF7wFdsR, 0, 0
FILE_BOF_BUFFER
\x89PNG^M^J^Z^J\0\0\0
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
xXlF7wFdsR, 27579, 0

View file

@ -3,7 +3,6 @@ v5HLI7MxPQh, 0, 0
FILE_BOF_BUFFER
hello world
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
v5HLI7MxPQh, 11, 0
@ -18,7 +17,6 @@ PZS1XGHkIf1, 0, 0
FILE_BOF_BUFFER
{^J "origin
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
PZS1XGHkIf1, 366, 0

View file

@ -3,7 +3,6 @@ nYgPNGLrZf9, 0, 0
FILE_BOF_BUFFER
#separator
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
nYgPNGLrZf9, 311, 0

View file

@ -3,7 +3,6 @@ wqKMAamJVSb, 0, 0
FILE_BOF_BUFFER
PK^C^D^T\0\0\0^H\0\xae
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
wqKMAamJVSb, 42208, 0

View file

@ -3,8 +3,8 @@
#empty_field (empty)
#unset_field -
#path file_analysis
#open 2013-04-04-21-22-26
#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256
#types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-04-04-21-22-26
#open 2013-04-11-17-29-51
#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256
#types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-04-11-17-29-51

View file

@ -3,7 +3,6 @@ cwR7l6Zctxb, 0, 0
FILE_BOF_BUFFER
Hello^M^J^M^J ^M
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
cwR7l6Zctxb, 79, 0
@ -17,7 +16,6 @@ ZAOEQmRyxv1, 0, 0
FILE_BOF_BUFFER
<html xmlns
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
ZAOEQmRyxv1, 1918, 0
@ -31,7 +29,6 @@ Ltd7QO7jEv3, 0, 0
FILE_BOF_BUFFER
Version 4.9
FILE_TYPE
file type is set
mime type is set
FILE_STATE_REMOVE
Ltd7QO7jEv3, 10823, 0

View file

@ -10,7 +10,7 @@ BEGIN { FS="\t"; OFS="\t"; type_col = -1; desc_col = -1 }
{
if ( $i == "mime_type" )
type_col = i-1;
if ( $i == "mime_desc" || $i == "file_type" )
if ( $i == "mime_desc" )
desc_col = i-1;
}
}

View file

@ -47,17 +47,11 @@ event file_new(f: fa_file)
print f$bof_buffer[0:10];
}
if ( f?$file_type || f?$mime_type )
print "FILE_TYPE";
# not actually printing the values due to libmagic variances
if ( f?$file_type )
{
print "file type is set";
f$file_type = "set";
}
if ( f?$mime_type )
{
print "FILE_TYPE";
print "mime type is set";
# not actually printing the values due to libmagic variances
f$mime_type = "set";
}
}