Large overhaul in name and appearance for file analysis.

This commit is contained in:
Seth Hall 2013-07-05 02:00:14 -04:00
parent caf61f619b
commit df2841458d
39 changed files with 420 additions and 875 deletions

View file

@ -1,6 +1,6 @@
@load ./main
@load ./utils
@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract
#@load ./file-ident
#@load ./file-hash
#@load ./file-extract

View file

@ -1,53 +1,58 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
@load base/frameworks/files
module HTTP;
export {
redef record HTTP::Info += {
## Number of MIME entities in the HTTP request message body so far.
request_mime_level: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
response_mime_level: count &default=0;
redef record Info += {
## The sniffed mime type of the data being sent by the client.
client_mime_type: string &log &optional;
## The sniffed mime type of the data being returned by the server.
mime_type: string &log &optional;
};
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
if ( ! c?$http ) return;
if ( is_orig )
++c$http$request_mime_level;
else
++c$http$response_mime_level;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
local mime_level: count =
is_orig ? c$http$request_mime_level : c$http$response_mime_level;
local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
if ( ! c?$http )
return "";
local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
if ( c$http$range_request )
return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, mime_level_str, " ", id_string(c$id));
{
return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http));
}
else
{
return cat(ANALYZER_HTTP, c$start_time, is_orig,
c$http$trans_depth, mime_depth, id_string(c$id));
}
}
module GLOBAL;
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
&priority=5
event bro_init() &priority=5
{
if ( tag != ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle);
}
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( c?$http )
{
#if (!f?$mime_type)
# print f;
#
#if ( f$is_orig )
# c$http$client_mime_type = f$mime_type;
#else
# c$http$mime_type = f$mime_type;
if ( c$http?$filename )
f$info$filename = c$http$filename;
}
}

View file

@ -1,100 +0,0 @@
##! Extracts the items from HTTP traffic, one per file. At this time only
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-analysis
module HTTP;
export {
## Pattern of file mime types to extract from HTTP response entity bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk location where files in request body were extracted.
extracted_request_files: vector of string &log &optional;
## On-disk location where files in response body were extracted.
extracted_response_files: vector of string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
function add_extraction_file(c: connection, is_orig: bool, fn: string)
{
if ( is_orig )
{
if ( ! c$http?$extracted_request_files )
c$http$extracted_request_files = vector();
c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
}
else
{
if ( ! c$http?$extracted_response_files )
c$http$extracted_response_files = vector();
c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
}
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
return;
}
local extracting: bool = F;
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
}

View file

@ -1,68 +0,0 @@
##! Calculate hashes for HTTP body transfers.
@load ./main
@load ./file-analysis
module HTTP;
export {
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
md5: string &log &optional;
## This value can be set per-transfer to determine per request
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( f?$mime_type && generate_md5 in f$mime_type )
{
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,105 +0,0 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/notice
@load ./main
@load ./utils
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
};
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
## that are not noteworthy before a notice is created. Each
## pattern added should match the complete URL (the matched URLs include
## "http://" at the beginning).
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -71,10 +71,14 @@ export {
## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in
## response.
range_request: bool &default=F;
range_request: bool &default=F;
## Number of MIME entities in the HTTP request message body so far.
orig_mime_depth: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
resp_mime_depth: count &default=0;
};
## Structure to maintain state for an HTTP connection with multiple
@ -283,6 +287,16 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
}
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
set_state(c, F, is_orig);
if ( is_orig )
++c$http$orig_mime_depth;
else
++c$http$resp_mime_depth;
}
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
{
set_state(c, F, is_orig);