zeek/scripts/base/protocols/http/file-ident.bro
Seth Hall 597a4d6704 Hopefully the last major script reorganization.
- policy/ renamed to scripts/

- By default BROPATH now contains:
	- scripts/
	- scripts/policy
	- scripts/site

- *Nearly* all tests pass.

- All of scripts/base/ is loaded by main.cc
	- Can be disabled by setting $BRO_NO_BASE_SCRIPTS
	- Scripts in scripts/base/ don't use relative path loading to ease use of BRO_NO_BASE_SCRIPTS (to copy and paste that script).

- The scripts in scripts/base/protocols/ only (or soon will only) do logging and state building.

- The scripts in scripts/base/frameworks/ add functionality without causing any additional overhead.

- All "detection" activity happens through scripts in scripts/policy/.

- Communications framework modified temporarily to need an environment variable to actually enable (ENABLE_COMMUNICATION=1)
	- This is so the communications framework can be loaded as part
	  of the base without causing trouble when it's not needed.
	- This will be removed once a resolution to ticket #540 is reached.
2011-08-05 23:09:53 -04:00

76 lines
2.2 KiB
Text

##! This script is involved in the identification of file types in HTTP
##! response bodies.
# Add the magic number signatures to the core signature set.
redef signature_files += "base/protocols/http/file-ident.sig";
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
module HTTP;
export {
redef enum Notice::Type += {
# This notice is thrown when the file extension doesn't
# seem to match the file contents.
Incorrect_File_Type,
};
redef record Info += {
## This will record the mime_type identified.
mime_type: string &log &optional;
## This indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to T.
first_chunk: bool &default=T;
};
redef enum Tags += {
IDENTIFIED_FILE
};
# Create regexes that *should* in be in the urls for specifics mime types.
# Notices are thrown if the pattern doesn't match the url for the file type.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
{
local url = build_url_http(c$http);
local message = fmt("%s %s %s", msg, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c,
$method=c$http$method,
$URL=url]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
}