mirror of
https://github.com/zeek/zeek.git
synced 2025-10-03 07:08:19 +00:00

- Add a timeout flag to file_analysis.log so it's easy to tell what has had at least one timeout trigger happen. - Fix ftp-data service tag not being set for reused connections. - Fix HTTP::Incorrect_File_Type because mime types returned by FAF have the charset still in them, but the HTTP::mime_types_extensions table does not and it requires an exact string match. (still ugly) - Add TRIGGER_NEW_CONN to track files going over multiple connections. - Add an initial file/mime type guess for non-linear file transfers. - Fix a case where file/mime type detection would never be attempted if the start of the file was a content gap. - Improve mime type tracking of HTTP byte-range/partial-content, even if the requests are pipelined or over multiple connections. - I changed the modbus.events test because having the baseline output be 80+ MB is nuts and it was sensitive to connection record redefs.
116 lines
3.5 KiB
Text
116 lines
3.5 KiB
Text
##! Identification of file types in HTTP response bodies with file content sniffing.
|
|
|
|
@load base/frameworks/notice
|
|
@load ./main
|
|
@load ./utils
|
|
@load ./file-analysis
|
|
|
|
module HTTP;
|
|
|
|
export {
|
|
redef enum Notice::Type += {
|
|
## Indicates when the file extension doesn't seem to match the file
|
|
## contents.
|
|
Incorrect_File_Type,
|
|
};
|
|
|
|
redef record Info += {
|
|
## Mime type of response body identified by content sniffing.
|
|
mime_type: string &log &optional;
|
|
};
|
|
|
|
## Mapping between mime type strings (without character set) and
|
|
## regular expressions for URLs.
|
|
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
|
|
## pattern doesn't match the mime type that was discovered.
|
|
const mime_types_extensions: table[string] of pattern = {
|
|
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
|
|
} &redef;
|
|
|
|
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
|
|
## that are not noteworthy before a notice is created. Each
|
|
## pattern added should match the complete URL (the matched URLs include
|
|
## "http://" at the beginning).
|
|
const ignored_incorrect_file_type_urls = /^$/ &redef;
|
|
}
|
|
|
|
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
|
|
&priority=5
|
|
{
|
|
if ( trig != FileAnalysis::TRIGGER_TYPE ) return;
|
|
if ( ! info?$mime_type ) return;
|
|
if ( ! info?$source ) return;
|
|
if ( info$source != "HTTP" ) return;
|
|
if ( ! info?$conns ) return;
|
|
|
|
for ( cid in info$conns )
|
|
{
|
|
local c: connection = info$conns[cid];
|
|
|
|
if ( ! c?$http ) next;
|
|
|
|
c$http$mime_type = info$mime_type;
|
|
|
|
local mime_str: string = split1(info$mime_type, /;/)[1];
|
|
|
|
if ( mime_str !in mime_types_extensions ) next;
|
|
if ( ! c$http?$uri ) next;
|
|
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
|
|
|
|
local url = build_url_http(c$http);
|
|
|
|
if ( url == ignored_incorrect_file_type_urls ) next;
|
|
|
|
local message = fmt("%s %s %s", mime_str, c$http$method, url);
|
|
NOTICE([$note=Incorrect_File_Type,
|
|
$msg=message,
|
|
$conn=c]);
|
|
}
|
|
}
|
|
|
|
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
|
|
&priority=5
|
|
{
|
|
if ( trig != FileAnalysis::TRIGGER_NEW_CONN ) return;
|
|
if ( ! info?$mime_type ) return;
|
|
if ( ! info?$source ) return;
|
|
if ( info$source != "HTTP" ) return;
|
|
if ( ! info?$conns ) return;
|
|
|
|
# Spread the mime around (e.g. for partial content, TRIGGER_TYPE only
|
|
# happens once for the first connection, but if there's subsequent
|
|
# connections to transfer the same file, they'll be lacking the mime_type
|
|
# field if we don't do this).
|
|
for ( cid in info$conns )
|
|
{
|
|
local c: connection = info$conns[cid];
|
|
|
|
if ( ! c?$http ) next;
|
|
|
|
c$http$mime_type = info$mime_type;
|
|
}
|
|
}
|
|
|
|
# Tracks byte-range request / partial content response mime types, indexed
|
|
# by [connection, uri] pairs. This is needed because a person can pipeline
|
|
# byte-range requests over multiple connections to the same uri. Without
|
|
# the tracking, only the first request in the pipeline for each connection
|
|
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
|
|
global partial_types: table[conn_id, string] of string &read_expire=5mins;
|
|
|
|
# Priority 4 so that it runs before the handler that will write to http.log.
|
|
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
|
|
&priority=4
|
|
{
|
|
if ( ! c$http$range_request ) return;
|
|
if ( ! c$http?$uri ) return;
|
|
|
|
if ( c$http?$mime_type )
|
|
{
|
|
partial_types[c$id, c$http$uri] = c$http$mime_type;
|
|
return;
|
|
}
|
|
|
|
if ( [c$id, c$http$uri] in partial_types )
|
|
c$http$mime_type = partial_types[c$id, c$http$uri];
|
|
}
|