mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 04:58:21 +00:00
94 lines
3 KiB
Text
94 lines
3 KiB
Text
##! This script is involved in the identification of file types in HTTP
|
|
##! response bodies.
|
|
|
|
# TODO:
|
|
# * Add a filter for configurably extracting certain file types into their own log?
|
|
|
|
@load http/base
|
|
@load http/utils
|
|
|
|
@load notice
|
|
@load signatures
|
|
|
|
module HTTP;
|
|
|
|
redef enum Notice::Type += {
|
|
# This notice is thrown when the file extension doesn't
|
|
# seem to match the file contents.
|
|
HTTP_IncorrectFileType,
|
|
};
|
|
|
|
redef record Info += {
|
|
## This will record the mime_type identified.
|
|
mime_type: string &log &optional;
|
|
};
|
|
|
|
|
|
export {
|
|
redef enum Tags += {
|
|
IDENTIFIED_FILE
|
|
};
|
|
|
|
# Since we're looking into the body for the mimetype detection, logging
|
|
# *can't* take place until after the body. To account for short bodies
|
|
# that may be contained within a single packet, we will wait until the
|
|
# next request because the http_entity_done event currently fires before
|
|
# HTTP body content matching signatures.
|
|
# TODO: should there be another log point for "after X body bytes"?
|
|
redef default_log_point = AFTER_REPLY;
|
|
|
|
# MIME types that you'd like this script to identify and log.
|
|
const watched_mime_types = /application\/x-dosexec/
|
|
| /application\/x-executable/ &redef;
|
|
|
|
# TODO This may be better done with a filter.
|
|
# URLs included here are not logged and notices are not thrown.
|
|
# Take care when defining regexes to not be overly broad.
|
|
#const ignored_uris = /^http:\/\/(au\.|www\.)?download\.windowsupdate\.com\/msdownload\/update/ &redef;
|
|
|
|
# Create regexes that *should* in be in the urls for specifics mime types.
|
|
# Notices are thrown if the pattern doesn't match the url for the file type.
|
|
const mime_types_extensions: table[string] of pattern = {
|
|
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
|
|
} &redef;
|
|
}
|
|
|
|
redef signature_files += "http/file-ident.sig";
|
|
# Ignore the signatures used to match files
|
|
redef Signatures::ignored_ids += /^matchfile-/;
|
|
|
|
event signature_match(state: signature_state, msg: string, data: string) &priority=5
|
|
{
|
|
# Only signatures matching file types are dealt with here.
|
|
if ( /^matchfile-/ !in state$sig_id ) return;
|
|
|
|
local c = state$conn;
|
|
|
|
set_state(c, F, F);
|
|
|
|
# Not much point in any of this if we don't know about the HTTP session.
|
|
if ( ! c?$http ) return;
|
|
|
|
# Set the mime type that was detected.
|
|
c$http$mime_type = msg;
|
|
|
|
# Fire the file_transferred event so that it can be picked up by other
|
|
# scripts, like the http/file-hash script since that uses file type to
|
|
# conditionally calculate an MD5 sum.
|
|
# TODO: We are leaving the descr field blank for now, but it shouldn't
|
|
# matter too much and hopefully the more generic file analysis code
|
|
# will make this completely irrelevant.
|
|
event file_transferred(c, data, "", msg);
|
|
|
|
if ( msg in mime_types_extensions &&
|
|
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
|
|
{
|
|
local url = build_url(c$http);
|
|
local message = fmt("%s %s %s", msg, c$http$method, url);
|
|
NOTICE([$note=HTTP_IncorrectFileType,
|
|
$msg=message,
|
|
$conn=c,
|
|
$method=c$http$method,
|
|
$URL=url]);
|
|
}
|
|
}
|