More flexible default mime file extraction and hashing.

This commit is contained in:
Seth Hall 2011-04-15 16:48:08 -04:00
parent 3f1bc7dc4e
commit f522b7085e
5 changed files with 50 additions and 10 deletions

View file

@ -11,6 +11,21 @@ function is_string_binary(s: string): bool
{
return byte_len(gsub(s, /[\x00-\x7f]/, "")) * 100 / |s| >= 25;
}
function join_string_set(ss: set[string], j: string): string
{
local output="";
local i=0;
for ( s in ss )
{
if ( i > 0 )
output = cat(output, j);
output = cat(output, s);
++i;
}
return output;
}
# Given an arbitrary string, this should extract a single directory.
# TODO: Make this work on Window's style directories.

View file

@ -1,4 +1,6 @@
@load functions
# TODO: need to figure out a way for these scripts to play along better.
@load smtp
# NOTES:
@ -31,6 +33,7 @@ export {
## This is the timestamp of when the MIME content transfer began.
ts: time &log;
id: conn_id &log;
app_protocol: string &log &optional;
filename: string &log &optional;
## Track how many byte of the MIME encoded file have been seen.
content_len: count &log &default=0;
@ -85,6 +88,9 @@ event mime_begin_entity(c: connection) &priority=10
set_session(c, T);
++c$mime_state$level;
if ( |c$service| > 0 )
c$mime$app_protocol = join_string_set(c$service, ",");
}
# This has priority 1 because other handlers need to know the current

View file

@ -1,9 +1,13 @@
@load mime/file-ident
module MIME;
export {
## The default setting for extracting files to disk.
const default_extract_file = F &redef;
## Pattern of file mime types to extract from MIME bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "mime-item" &redef;
redef record Info += {
## The name of the file where this MIME entity is written.
@ -11,18 +15,26 @@ export {
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=default_extract_file;
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
file_handle: file &optional;
};
}
event mime_segment_data(c: connection, length: count, data: string) &priority=4
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( extract_file_types in c$mime$mime_type )
c$mime$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( c$mime$extract_file && c$mime$content_len == 0 )
{
c$mime$on_disk_filename = fmt("mimefile.%s-%d", id_string(c$id), c$mime_state$level);
c$mime$on_disk_filename = fmt("%s.%s-%d", extraction_prefix,
id_string(c$id),
c$mime_state$level);
c$mime$file_handle = open(c$mime$on_disk_filename);
enable_raw_output(c$mime$file_handle);
}

View file

@ -1,20 +1,27 @@
@load mime/file-ident
module MIME;
export {
## The default setting for calculating MD5 sums on files transferred.
const default_calc_md5 = F &redef;
## Pattern of file mime types to calculate MD5 sums for MIME bodies.
const calc_md5_file_types = /NO_DEFAULT/ &redef;
redef record Info += {
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=default_calc_md5;
calc_md5: bool &default=F;
## The calculated MD5 sum for the MIME entity.
md5_hash: string &log &optional;
};
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( calc_md5_file_types in c$mime$mime_type )
c$mime$calc_md5 = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( c$mime$calc_md5 )

View file

@ -2,7 +2,7 @@ module MIME;
export {
## The default setting for finding mime types on files.
const default_find_type = F &redef;
const default_find_type = T &redef;
redef record Info += {
find_type: bool &default=default_find_type;
@ -12,7 +12,7 @@ export {
};
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
event mime_segment_data(c: connection, length: count, data: string) &priority=7
{
if ( c$mime$content_len == 0 )
{