Hopefully the last major script reorganization.

- policy/ renamed to scripts/

- By default BROPATH now contains:
	- scripts/
	- scripts/policy
	- scripts/site

- *Nearly* all tests pass.

- All of scripts/base/ is loaded by main.cc
	- Can be disabled by setting $BRO_NO_BASE_SCRIPTS
	- Scripts in scripts/base/ don't use relative path loading to ease use of BRO_NO_BASE_SCRIPTS (to copy and paste that script).

- The scripts in scripts/base/protocols/ only (or soon will only) do logging and state building.

- The scripts in scripts/base/frameworks/ add functionality without causing any additional overhead.

- All "detection" activity happens through scripts in scripts/policy/.

- Communications framework modified temporarily to need an environment variable to actually enable (ENABLE_COMMUNICATION=1)
	- This is so the communications framework can be loaded as part
	  of the base without causing trouble when it's not needed.
	- This will be removed once a resolution to ticket #540 is reached.
This commit is contained in:
Seth Hall 2011-08-05 23:09:53 -04:00
parent 68171cf179
commit 597a4d6704
257 changed files with 1311 additions and 1225 deletions

View file

@ -0,0 +1,5 @@
@load ./main
@load ./utils
@load ./file-ident
@load ./file-hash
@load ./file-extract

View file

@ -0,0 +1,56 @@
##! Extracts the items from HTTP traffic, one per file. At this time only
##! the message body from the server can be extracted with this script.
module HTTP;
export {
## Pattern of file mime types to extract from HTTP entity bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "http-item" &redef;
redef record Info += {
## This field can be set per-connection to determine if the entity body
## will be extracted. It must be set to T on or before the first
## entity_body_data event.
extracting_file: bool &default=F;
## This is the holder for the file handle as the file is being written
## to disk.
extraction_file: file &log &optional;
};
redef record State += {
entity_bodies: count &default=0;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
# Client body extraction is not currently supported in this script.
if ( is_orig || ! c$http$first_chunk ) return;
if ( c$http$first_chunk )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
{
c$http$extracting_file = T;
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", ++c$http_state$entity_bodies);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
}
}
if ( c$http$extracting_file )
print c$http$extraction_file, data;
}
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http$extracting_file )
close(c$http$extraction_file);
}

View file

@ -0,0 +1,92 @@
##! Calculate hashes for HTTP body transfers.
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## The MD5 sum for a file transferred over HTTP will be stored here.
md5: string &log &optional;
## This value can be set per-transfer to determine per request
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is currently being
## calculated for the current file transfer.
calculating_md5: bool &default=F;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http$first_chunk )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
{
c$http$calculating_md5 = T;
md5_hash_init(c$id);
}
}
if ( c$http$calculating_md5 )
md5_hash_update(c$id, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http ) return;
set_state(c, F, is_orig);
if ( c$http$calculating_md5 )
{
c$http$calculating_md5 = F;
md5_hash_finish(c$id);
}
}
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
{
if ( is_orig || ! c?$http ) return;
if ( c$http$calculating_md5 )
{
local url = build_url_http(c$http);
c$http$calculating_md5 = F;
c$http$md5 = md5_hash_finish(c$id);
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c, $URL=url]);
}
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]$calculating_md5 )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$id);
}
}

View file

@ -0,0 +1,76 @@
##! This script is involved in the identification of file types in HTTP
##! response bodies.
# Add the magic number signatures to the core signature set.
redef signature_files += "base/protocols/http/file-ident.sig";
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
module HTTP;
export {
redef enum Notice::Type += {
# This notice is thrown when the file extension doesn't
# seem to match the file contents.
Incorrect_File_Type,
};
redef record Info += {
## This will record the mime_type identified.
mime_type: string &log &optional;
## This indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to T.
first_chunk: bool &default=T;
};
redef enum Tags += {
IDENTIFIED_FILE
};
# Create regexes that *should* in be in the urls for specifics mime types.
# Notices are thrown if the pattern doesn't match the url for the file type.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
{
local url = build_url_http(c$http);
local message = fmt("%s %s %s", msg, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c,
$method=c$http$method,
$URL=url]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
}

View file

@ -0,0 +1,144 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}

View file

@ -0,0 +1,247 @@
module HTTP;
export {
redef enum Log::ID += { HTTP };
## Indicate a type of attack or compromise in the record to be logged.
type Tags: enum {
EMPTY
};
## This setting changes if passwords used in Basic-Auth are captured or not.
const default_capture_password = F &redef;
type Info: record {
ts: time &log;
uid: string &log;
id: conn_id &log;
## The verb used in the HTTP request (GET, POST, HEAD, etc.).
method: string &log &optional;
## The value of the HOST header.
host: string &log &optional;
## The URI used in the request.
uri: string &log &optional;
## The value of the "referer" header. The comment is deliberately
## misspelled like the standard declares, but the name used here is
## "referrer" spelled correctly.
referrer: string &log &optional;
## The value of the User-Agent header from the client.
user_agent: string &log &optional;
## The value of the Content-Length header from the client.
request_content_length: count &log &optional;
## The value of the Content-Length header from the server.
response_content_length: count &log &optional;
## The status code returned by the server.
status_code: count &log &optional;
## The status message returned by the server.
status_msg: string &log &optional;
## The filename given in the Content-Disposition header
## sent by the server.
filename: string &log &optional;
## This is a set of indicators of various attributes discovered and
## related to a particular request/response pair.
tags: set[Tags] &log;
## The username if basic-auth is performed for the request.
username: string &log &optional;
## The password if basic-auth is performed for the request.
password: string &log &optional;
## This determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional;
};
type State: record {
pending: table[count] of Info;
current_response: count &default=0;
current_request: count &default=0;
};
## The list of HTTP headers typically used to indicate a proxied request.
const proxy_headers: set[string] = {
"FORWARDED",
"X-FORWARDED-FOR",
"X-FORWARDED-FROM",
"CLIENT-IP",
"VIA",
"XROXY-CONNECTION",
"PROXY-CONNECTION",
} &redef;
global log_http: event(rec: Info);
}
# Add the http state tracking fields to the connection record.
redef record connection += {
http: Info &optional;
http_state: State &optional;
};
# Initialize the HTTP logging stream.
event bro_init() &priority=5
{
Log::create_stream(HTTP, [$columns=Info, $ev=log_http]);
}
# DPD configuration.
const ports = {
80/tcp, 81/tcp, 631/tcp, 1080/tcp, 3138/tcp,
8000/tcp, 8080/tcp, 8888/tcp,
};
redef dpd_config += {
[[ANALYZER_HTTP, ANALYZER_HTTP_BINPAC]] = [$ports = ports],
};
redef capture_filters += {
["http"] = "tcp and port (80 or 81 or 631 or 1080 or 3138 or 8000 or 8080 or 8888)"
};
function new_http_session(c: connection): Info
{
local tmp: Info;
tmp$ts=network_time();
tmp$uid=c$uid;
tmp$id=c$id;
return tmp;
}
function set_state(c: connection, request: bool, is_orig: bool)
{
if ( ! c?$http_state )
{
local s: State;
c$http_state = s;
}
# These deal with new requests and responses.
if ( request || c$http_state$current_request !in c$http_state$pending )
c$http_state$pending[c$http_state$current_request] = new_http_session(c);
if ( ! is_orig && c$http_state$current_response !in c$http_state$pending )
c$http_state$pending[c$http_state$current_response] = new_http_session(c);
if ( is_orig )
c$http = c$http_state$pending[c$http_state$current_request];
else
c$http = c$http_state$pending[c$http_state$current_response];
}
event http_request(c: connection, method: string, original_URI: string,
unescaped_URI: string, version: string) &priority=5
{
if ( ! c?$http_state )
{
local s: State;
c$http_state = s;
}
++c$http_state$current_request;
set_state(c, T, T);
c$http$method = method;
c$http$uri = unescaped_URI;
}
event http_reply(c: connection, version: string, code: count, reason: string) &priority=5
{
if ( ! c?$http_state )
{
local s: State;
c$http_state = s;
}
++c$http_state$current_response;
set_state(c, F, F);
c$http$status_code = code;
c$http$status_msg = reason;
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=5
{
set_state(c, F, is_orig);
if ( is_orig ) # client headers
{
if ( name == "REFERER" )
c$http$referrer = value;
else if ( name == "HOST" )
# The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1];
else if ( name == "CONTENT-LENGTH" )
c$http$request_content_length = extract_count(value);
else if ( name == "USER-AGENT" )
c$http$user_agent = value;
else if ( name in proxy_headers )
{
if ( ! c$http?$proxied )
c$http$proxied = set();
add c$http$proxied[fmt("%s -> %s", name, value)];
}
else if ( name == "AUTHORIZATION" )
{
if ( /^[bB][aA][sS][iI][cC] / in value )
{
local userpass = decode_base64(sub(value, /[bB][aA][sS][iI][cC][[:blank:]]/, ""));
local up = split(userpass, /:/);
if ( |up| >= 2 )
{
c$http$username = up[1];
if ( c$http$capture_password )
c$http$password = up[2];
}
else
{
c$http$username = "<problem-decoding>";
if ( c$http$capture_password )
c$http$password = userpass;
}
}
}
}
else # server headers
{
if ( name == "CONTENT-LENGTH" )
c$http$response_content_length = extract_count(value);
else if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
c$http$filename = sub(value, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
}
}
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
{
set_state(c, F, is_orig);
}
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = -5
{
# The reply body is done so we're ready to log.
if ( ! is_orig )
{
Log::write(HTTP, c$http);
delete c$http_state$pending[c$http_state$current_response];
}
}
event connection_state_remove(c: connection)
{
# Flush all pending but incomplete request/response pairs.
if ( c?$http_state )
{
for ( r in c$http_state$pending )
{
Log::write(HTTP, c$http_state$pending[r]);
}
}
}

View file

@ -0,0 +1,94 @@
##! This script makes it possible for the HTTP analysis scripts to analyze
##! the apparent normal case of "206 Partial Content" responses.
##!
##! This script doesn't work yet and isn't loaded by default.
module HTTP;
export {
redef enum Notice::Type += {
Partial_Content_Out_Of_Order,
};
type Range: record {
from: count;
to: count;
} &log;
redef record Info += {
current_range: count &default=0;
request_ranges: vector of Range &optional;
response_range: Range &optional;
};
## Index is client IP address, server IP address, and URL being requested. The
## URL is tracked as part of the index in case multiple partial content segmented
## files are being transferred simultaneously between the server and client.
global partial_content_files: table[addr, addr, string] of Info &read_expire=5mins &redef;
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=2
{
local parts: table[count] of string;
if ( is_orig && name == "RANGE" )
{
# Example --> Range: bytes=1-1,2336-4951
parts = split(value, /[=]/);
if ( 2 in parts )
{
local ranges = split(parts[2], /,/);
for ( i in ranges )
{
if ( ! c$http?$request_ranges )
c$http$request_ranges = vector();
parts = split(ranges[i], /-/);
local r: Range = [$from=extract_count(parts[1]), $to=extract_count(parts[2])];
print r;
c$http$request_ranges[|c$http$request_ranges|] = r;
}
}
}
else if ( ! is_orig && name == "CONTENT-RANGE" )
{
# Example --> Content-Range: bytes 2336-4951/489528
parts = split(value, /[0-9]*/);
c$http$response_range = [$from=extract_count(parts[2]), $to=extract_count(parts[4])];
}
}
event http_reply(c: connection, version: string, code: count, reason: string) &priority=5
{
if ( code != 206 || ! c$http?$request_ranges )
return;
local url = build_url(c$http);
if ( [c$id$orig_h, c$id$resp_h, url] !in partial_content_files )
{
partial_content_files[c$id$orig_h, c$id$resp_h, url] = copy(c$http);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string)
{
if ( is_orig || c$http$status_code != 206 || ! c$http?$request_ranges )
return;
local url = build_url(c$http);
local http = partial_content_files[c$id$orig_h, c$id$resp_h, url];
local range = http$request_ranges[http$current_range];
print http$current_range;
if ( http$current_range == 0 &&
c$http$response_range$from == 0 )
{
print "correct file beginning!";
}
}
event http_end_entity(c: connection, is_orig: bool)
{
print "end entity";
++c$http$current_range;
}

View file

@ -0,0 +1,38 @@
##! Utilities specific for HTTP processing.
module HTTP;
export {
global extract_keys: function(data: string, kv_splitter: pattern): string_vec;
global build_url: function(rec: Info): string;
global build_url_http: function(rec: Info): string;
}
function extract_keys(data: string, kv_splitter: pattern): string_vec
{
local key_vec: vector of string = vector();
local parts = split(data, kv_splitter);
for ( part_index in parts )
{
local key_val = split1(parts[part_index], /=/);
if ( 1 in key_val )
key_vec[|key_vec|] = key_val[1];
}
return key_vec;
}
function build_url(rec: Info): string
{
local uri = rec?$uri ? rec$uri : "/<missed_request>";
local host = rec?$host ? rec$host : fmt("%s", rec$id$resp_h);
if ( rec$id$resp_p != 80/tcp )
host = fmt("%s:%s", host, rec$id$resp_p);
return fmt("%s%s", host, uri);
}
function build_url_http(rec: Info): string
{
return fmt("http://%s", build_url(rec));
}