Merge remote-tracking branch 'origin/master' into topic/robin/plugins

Thanks to git this merge was less troublesome that I was afraid it
would be. Not all tests pass yet though (and file hashes have changed
unfortunately).

Conflicts:
	cmake
	doc/scripts/DocSourcesList.cmake
	scripts/base/init-bare.bro
	scripts/base/protocols/ftp/main.bro
	scripts/base/protocols/irc/dcc-send.bro
	scripts/test-all-policy.bro
	src/AnalyzerTags.h
	src/CMakeLists.txt
	src/analyzer/Analyzer.cc
	src/analyzer/protocol/file/File.cc
	src/analyzer/protocol/file/File.h
	src/analyzer/protocol/http/HTTP.cc
	src/analyzer/protocol/http/HTTP.h
	src/analyzer/protocol/mime/MIME.cc
	src/event.bif
	src/main.cc
	src/util-config.h.in
	testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/istate.events-ssl/receiver.http.log
	testing/btest/Baseline/istate.events-ssl/sender.http.log
	testing/btest/Baseline/istate.events/receiver.http.log
	testing/btest/Baseline/istate.events/sender.http.log
This commit is contained in:
Robin Sommer 2013-05-16 17:58:48 -07:00
commit eb637f9f3e
411 changed files with 240276 additions and 161868 deletions

View file

@ -1,5 +1,6 @@
@load ./main
@load ./utils
@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract

View file

@ -0,0 +1,31 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
if ( c$http$range_request )
return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
{
if ( tag != Analyzer::ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -2,8 +2,7 @@
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-ident
@load base/utils/files
@load ./file-analysis
module HTTP;
@ -16,45 +15,70 @@ export {
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the
## content.
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
global extract_count: count = 0;
function get_extraction_name(f: fa_file): string
{
# Client body extraction is not currently supported in this script.
if ( is_orig )
return;
if ( c$http$first_chunk )
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
return r;
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c$http$extract_file = T;
}
if ( c$http$extract_file )
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
return;
}
if ( c$http?$extraction_file )
print c$http$extraction_file, data;
}
local extracting: bool = F;
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http?$extraction_file )
close(c$http$extraction_file);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
}
}

View file

@ -1,15 +1,11 @@
##! Calculate hashes for HTTP body transfers.
@load ./file-ident
@load ./main
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
@ -19,10 +15,6 @@ export {
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
};
## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http$first_chunk )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
{
c$http$md5_handle = md5_hash_init();
}
}
if ( c$http?$md5_handle )
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
{
if ( is_orig || ! c?$http ) return;
if ( c$http?$md5_handle )
if ( f?$mime_type && generate_md5 in f$mime_type )
{
local url = build_url_http(c$http);
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
event connection_state_remove(c: connection) &priority=-5
event file_state_remove(f: fa_file) &priority=4
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]?$md5_handle )
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,37 +1,28 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file contents.
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
};
## Mapping between mime types and regular expressions for URLs
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern
## doesn't match the mime type that was discovered.
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
@ -43,43 +34,72 @@ export {
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls )
return;
local message = fmt("%s %s %s", msg, c$http$method, url);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -1,144 +0,0 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}

View file

@ -71,6 +71,10 @@ export {
## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in
## response.
range_request: bool &default=F;
};
## Structure to maintain state for an HTTP connection with multiple
@ -232,6 +236,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
else if ( name == "HOST" )
# The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1];
else if ( name == "RANGE" )
c$http$range_request = T;
else if ( name == "USER-AGENT" )
c$http$user_agent = value;