mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00

The file_new event now takes over the function of file_type, file_bof, and file_bof_buffer.
110 lines
3.3 KiB
Text
110 lines
3.3 KiB
Text
##! Identification of file types in HTTP response bodies with file content sniffing.
|
|
|
|
@load base/frameworks/notice
|
|
@load ./main
|
|
@load ./utils
|
|
@load ./file-analysis
|
|
|
|
module HTTP;
|
|
|
|
export {
|
|
redef enum Notice::Type += {
|
|
## Indicates when the file extension doesn't seem to match the file
|
|
## contents.
|
|
Incorrect_File_Type,
|
|
};
|
|
|
|
redef record Info += {
|
|
## Mime type of response body identified by content sniffing.
|
|
mime_type: string &log &optional;
|
|
};
|
|
|
|
## Mapping between mime type strings (without character set) and
|
|
## regular expressions for URLs.
|
|
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
|
|
## pattern doesn't match the mime type that was discovered.
|
|
const mime_types_extensions: table[string] of pattern = {
|
|
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
|
|
} &redef;
|
|
|
|
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
|
|
## that are not noteworthy before a notice is created. Each
|
|
## pattern added should match the complete URL (the matched URLs include
|
|
## "http://" at the beginning).
|
|
const ignored_incorrect_file_type_urls = /^$/ &redef;
|
|
}
|
|
|
|
event file_new(f: fa_file) &priority=5
|
|
{
|
|
if ( ! f?$source ) return;
|
|
if ( f$source != "HTTP" ) return;
|
|
if ( ! f?$mime_type ) return;
|
|
if ( ! f?$conns ) return;
|
|
|
|
for ( cid in f$conns )
|
|
{
|
|
local c: connection = f$conns[cid];
|
|
|
|
if ( ! c?$http ) next;
|
|
|
|
c$http$mime_type = f$mime_type;
|
|
|
|
local mime_str: string = split1(f$mime_type, /;/)[1];
|
|
|
|
if ( mime_str !in mime_types_extensions ) next;
|
|
if ( ! c$http?$uri ) next;
|
|
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
|
|
|
|
local url = build_url_http(c$http);
|
|
|
|
if ( url == ignored_incorrect_file_type_urls ) next;
|
|
|
|
local message = fmt("%s %s %s", mime_str, c$http$method, url);
|
|
NOTICE([$note=Incorrect_File_Type,
|
|
$msg=message,
|
|
$conn=c]);
|
|
}
|
|
}
|
|
|
|
event file_over_new_connection(f: fa_file) &priority=5
|
|
{
|
|
if ( ! f?$source ) return;
|
|
if ( f$source != "HTTP" ) return;
|
|
if ( ! f?$mime_type ) return;
|
|
if ( ! f?$conns ) return;
|
|
|
|
# Spread the mime around (e.g. for partial content, file_type event only
|
|
# happens once for the first connection, but if there's subsequent
|
|
# connections to transfer the same file, they'll be lacking the mime_type
|
|
# field if we don't do this).
|
|
for ( cid in f$conns )
|
|
{
|
|
local c: connection = f$conns[cid];
|
|
if ( ! c?$http ) next;
|
|
c$http$mime_type = f$mime_type;
|
|
}
|
|
}
|
|
|
|
# Tracks byte-range request / partial content response mime types, indexed
|
|
# by [connection, uri] pairs. This is needed because a person can pipeline
|
|
# byte-range requests over multiple connections to the same uri. Without
|
|
# the tracking, only the first request in the pipeline for each connection
|
|
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
|
|
global partial_types: table[conn_id, string] of string &read_expire=5mins;
|
|
|
|
# Priority 4 so that it runs before the handler that will write to http.log.
|
|
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
|
|
&priority=4
|
|
{
|
|
if ( ! c$http$range_request ) return;
|
|
if ( ! c$http?$uri ) return;
|
|
|
|
if ( c$http?$mime_type )
|
|
{
|
|
partial_types[c$id, c$http$uri] = c$http$mime_type;
|
|
return;
|
|
}
|
|
|
|
if ( [c$id, c$http$uri] in partial_types )
|
|
c$http$mime_type = partial_types[c$id, c$http$uri];
|
|
}
|