FileAnalysis: replace script-layer http file analysis.

Other misc:

- Remove HTTP::MD5 notice.

- Add "last_active" field to FileAnalysis::Info record.

- Replace "conn_uids", "conn_ids" fields in FileAnalysis::Info record
  with just a "conns" fields containing full connection records.

- The http-methods unit test is failing now, but I think it will be
  fixed once I change the file handle callback mechanism to use events
  instead.
This commit is contained in:
Jon Siwek 2013-03-22 16:14:06 -05:00
parent 7034785810
commit 71f0e2d276
61 changed files with 411 additions and 625 deletions

View file

@ -2,8 +2,7 @@
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-ident
@load base/utils/files
@load ./file-analysis
module HTTP;
@ -16,45 +15,77 @@ export {
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the
## content.
## set before or by the first :bro:enum:`FileAnalysis::TRIGGER_NEW`
## for the file content.
extract_file: bool &default=F;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
global extract_count: count = 0;
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
# Client body extraction is not currently supported in this script.
if ( is_orig )
return;
if ( c$http$first_chunk )
if ( trig != FileAnalysis::TRIGGER_TYPE ) return;
if ( ! info?$mime_type ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
if ( extract_file_types !in info$mime_type ) return;
for ( act in info$actions )
if ( act$act == FileAnalysis::ACTION_EXTRACT ) return;
local fname: string = fmt("%s-%s-%d.dat", extraction_prefix, info$file_id,
extract_count);
++extract_count;
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
if ( ! info?$conns ) return;
for ( cid in info$conns )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
{
c$http$extract_file = T;
}
local c: connection = info$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
}
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( trig != FileAnalysis::TRIGGER_NEW ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
if ( ! info?$conns ) return;
local fname: string = fmt("%s-%s-%d.dat", extraction_prefix, info$file_id,
extract_count);
local extracting: bool = F;
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$http ) next;
if ( c$http$extract_file )
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
if ( ! extracting )
{
FileAnalysis::add_action(info$file_id,
[$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
extracting = T;
++extract_count;
}
c$http$extraction_file = fname;
}
}
if ( c$http?$extraction_file )
print c$http$extraction_file, data;
}
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http?$extraction_file )
close(c$http$extraction_file);
}

View file

@ -1,15 +1,11 @@
##! Calculate hashes for HTTP body transfers.
@load ./file-ident
@load ./main
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
@ -19,10 +15,6 @@ export {
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
};
## Generate MD5 sums for these filetypes.
@ -31,62 +23,67 @@ export {
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http$first_chunk )
if ( trig != FileAnalysis::TRIGGER_TYPE ) return;
if ( ! info?$mime_type ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
if ( generate_md5 in info$mime_type )
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_MD5]);
else if ( info?$conns )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
for ( cid in info$conns )
{
c$http$md5_handle = md5_hash_init();
local c: connection = info$conns[cid];
if ( ! c?$http ) next;
if ( c$http$calc_md5 )
{
FileAnalysis::add_action(info$file_id,
[$act=FileAnalysis::ACTION_MD5]);
return;
}
}
}
if ( c$http?$md5_handle )
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http?$md5_handle )
if ( trig != FileAnalysis::TRIGGER_DONE &&
trig != FileAnalysis::TRIGGER_EOF ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
if ( ! info?$conns ) return;
local act: FileAnalysis::ActionArgs = [$act=FileAnalysis::ACTION_MD5];
if ( act !in info$actions ) return;
local result = info$actions[act];
if ( ! result?$md5 ) return;
for ( cid in info$conns )
{
local url = build_url_http(c$http);
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c]);
local c: connection = info$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = result$md5;
}
}
event connection_state_remove(c: connection) &priority=-5
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]?$md5_handle )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
}
if ( trig != FileAnalysis::TRIGGER_GAP ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
FileAnalysis::add_action(info$file_id, [$act=FileAnalysis::ACTION_MD5]);
}

View file

@ -1,15 +1,9 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
@load ./file-analysis
module HTTP;
@ -22,11 +16,6 @@ export {
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
};
## Mapping between mime types and regular expressions for URLs
@ -43,43 +32,34 @@ export {
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
if ( trig != FileAnalysis::TRIGGER_TYPE ) return;
if ( ! info?$mime_type ) return;
if ( ! info?$source ) return;
if ( info$source != "HTTP" ) return;
if ( ! info?$conns ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
for ( cid in info$conns )
{
local c: connection = info$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = info$mime_type;
if ( info$mime_type !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[info$mime_type] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls )
return;
local message = fmt("%s %s %s", msg, c$http$method, url);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", info$mime_type, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
}

View file

@ -1,144 +0,0 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}