Revert "Add extract_limit_includes_missing option for file extraction"

This reverts commit f4d0fdcd5c.
This commit is contained in:
Tim Wojtulewicz 2023-09-14 12:10:40 -07:00
parent 17347df036
commit 5934e143aa
14 changed files with 18 additions and 151 deletions

View file

@ -11,15 +11,6 @@ export {
## number of bytes). A value of zero means unlimited.
option default_limit = 0;
## This setting configures if the file extract limit is inclusive
## of missing bytes. By default, missing bytes do count towards the
## limit.
## Setting this option to false changes this behavior so that missing
## bytes no longer count towards these limits. Files with
## missing bytes are created as sparse files on disk. Their apparent size
## can exceed this file size limit.
option default_limit_includes_missing = T;
redef record Files::Info += {
## Local filename of extracted file.
extracted: string &optional &log;
@ -46,14 +37,6 @@ export {
## :zeek:see:`FileExtract::set_limit` is called to increase the
## limit. A value of zero means "no limit".
extract_limit: count &default=default_limit;
## By default, missing bytes in files count towards the extract file size.
## Missing bytes can, e.g., occur due to missed traffic, or offsets
## used when downloading files.
## Setting this option to false changes this behavior so that holes
## in files do no longer count towards these limits. Files with
## holes are created as sparse files on disk. Their apparent size
## can exceed this file size limit.
extract_limit_includes_missing: bool &default=default_limit_includes_missing;
};
## Sets the maximum allowed extracted file size.

View file

@ -13,10 +13,9 @@ namespace zeek::file_analysis::detail
{
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit, bool arg_limit_includes_missing)
uint64_t arg_limit)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
filename(arg_filename), limit(arg_limit), written(0),
limit_includes_missing(arg_limit_includes_missing)
filename(arg_filename), limit(arg_limit), depth(0)
{
char buf[128];
file_stream = fopen(filename.data(), "wb");
@ -61,25 +60,14 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::
{
const auto& fname = get_extract_field_val(args, "extract_filename");
const auto& limit = get_extract_field_val(args, "extract_limit");
const auto& extract_limit_includes_missing = get_extract_field_val(
args, "extract_limit_includes_missing");
if ( ! fname || ! limit || ! extract_limit_includes_missing )
if ( ! fname || ! limit )
return nullptr;
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount(),
extract_limit_includes_missing->AsBool());
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount());
}
/**
* Check if we are exceeding the write limit with this write.
* @param lim size limit
* @param written how many bytes we have written so far
* @param len length of the write
* @param n number of bytes to write to keep within limit
* @returns true if limit exceeded
*/
static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n)
static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uint64_t* n)
{
if ( lim == 0 )
{
@ -87,14 +75,14 @@ static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, u
return false;
}
if ( written >= lim )
if ( depth >= lim )
{
*n = 0;
return true;
}
else if ( written + len > lim )
else if ( depth + len > lim )
{
*n = lim - written;
*n = lim - depth;
return true;
}
else
@ -111,7 +99,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false;
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
if ( limit_exceeded && file_extraction_limit )
{
@ -120,7 +108,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
}
char buf[128];
@ -136,7 +124,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false;
}
written += towrite;
depth += towrite;
}
// Assume we may not try to write anything more for a while due to reaching
@ -157,30 +145,7 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len)
if ( ! file_stream )
return false;
if ( limit_includes_missing )
{
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
// if the limit is exceeded, we have to raise the event. This gives scripts the opportunity
// to raise the limit.
if ( limit_exceeded && file_extraction_limit )
{
file_analysis::File* f = GetFile();
f->FileEvent(file_extraction_limit,
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// we have to check again if the limit is still exceedee
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
// if the limit is exceeded, abort and don't do anything - no reason to seek.
if ( limit_exceeded )
return false;
// if we don't skip holes, count this hole against the write limit
written += len;
}
if ( fseek(file_stream, len + offset, SEEK_SET) != 0 )
if ( depth == offset )
{
char* tmp = new char[len]();
@ -196,7 +161,7 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len)
}
delete[] tmp;
written += len;
depth += len;
}
return true;

View file

@ -65,17 +65,15 @@ protected:
* @param arg_filename a file system path which specifies the local file
* to which the contents of the file will be extracted/written.
* @param arg_limit the maximum allowed file size.
* @param arg_limit_includes_missing missing bytes count towards limit if true.
*/
Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit, bool arg_limit_includes_missing);
uint64_t arg_limit);
private:
std::string filename;
FILE* file_stream;
uint64_t limit; // the file extraction limit
uint64_t written; // how many bytes we have written so far
bool limit_includes_missing; // do count missing bytes against limit if true
uint64_t limit;
uint64_t depth;
};
} // namespace zeek::file_analysis::detail

View file

@ -551,7 +551,6 @@
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (FTP::max_reply_msg_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (FTP::max_user_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (FileExtract::default_limit, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (FileExtract::default_limit_includes_missing, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (Files::enable_reassembler, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (GridFTP::max_time, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
0.000000 MetaHookPost CallFunction(Option::set_change_handler, <frame>, (GridFTP::size_threshold, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)) -> <no result>
@ -2179,7 +2178,6 @@
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (FTP::max_reply_msg_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (FTP::max_user_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (FileExtract::default_limit, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (FileExtract::default_limit_includes_missing, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (Files::enable_reassembler, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (GridFTP::max_time, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
0.000000 MetaHookPre CallFunction(Option::set_change_handler, <frame>, (GridFTP::size_threshold, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100))
@ -3806,7 +3804,6 @@
0.000000 | HookCallFunction Option::set_change_handler(FTP::max_reply_msg_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(FTP::max_user_length, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(FileExtract::default_limit, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(FileExtract::default_limit_includes_missing, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(Files::enable_reassembler, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(GridFTP::max_time, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)
0.000000 | HookCallFunction Option::set_change_handler(GridFTP::size_threshold, Config::config_option_changed{ if (<skip-config-log> == Config::location) return (Config::new_value)Config::log = Config::Info($ts=network_time(), $id=Config::ID, $old_value=Config::format_value(lookup_ID(Config::ID)), $new_value=Config::format_value(Config::new_value))if ( != Config::location) Config::log$location = Config::locationLog::write(Config::LOG, to_any_coerceConfig::log)return (Config::new_value)}, -100)

View file

@ -1,2 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
file_extraction_limit, 10, 2147483648

View file

@ -1 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -1,2 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
file_extraction_limit, 1, 2

View file

@ -1,11 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path files
#open XXXX-XX-XX-XX-XX-XX
#fields ts fuid uid id.orig_h id.orig_p id.resp_h id.resp_p source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid extracted extracted_cutoff extracted_size
#types time string string addr port addr port string count set[string] string string interval bool bool count count count count bool string string bool count
XXXXXXXXXX.XXXXXX Fg5gNDmaUhHwqjbp8 CHhAvVGS1DHFjwGM9 192.168.65.2 53720 91.189.91.123 80 HTTP 0 EXTRACT - - 0.000000 F F 2 5037662208 2147483648 0 T - 1 T 10
#close XXXX-XX-XX-XX-XX-XX

View file

@ -1,11 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path files
#open XXXX-XX-XX-XX-XX-XX
#fields ts fuid uid id.orig_h id.orig_p id.resp_h id.resp_p source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid extracted extracted_cutoff extracted_size
#types time string string addr port addr port string count set[string] string string interval bool bool count count count count bool string string bool count
XXXXXXXXXX.XXXXXX Fg5gNDmaUhHwqjbp8 CHhAvVGS1DHFjwGM9 192.168.65.2 53720 91.189.91.123 80 HTTP 0 EXTRACT - - 0.000000 F F 2 5037662208 2147483648 0 T - 2 F -
#close XXXX-XX-XX-XX-XX-XX

View file

@ -1,11 +0,0 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path files
#open XXXX-XX-XX-XX-XX-XX
#fields ts fuid uid id.orig_h id.orig_p id.resp_h id.resp_p source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid extracted extracted_cutoff extracted_size
#types time string string addr port addr port string count set[string] string string interval bool bool count count count count bool string string bool count
XXXXXXXXXX.XXXXXX Fg5gNDmaUhHwqjbp8 CHhAvVGS1DHFjwGM9 192.168.65.2 53720 91.189.91.123 80 HTTP 0 EXTRACT - - 0.000000 F F 2 5037662208 2147483648 0 T - 3 T 1
#close XXXX-XX-XX-XX-XX-XX

View file

@ -1,7 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
data3, FyjjRu4ARLzpsPLhNh,
data3, Fz3QLf4Bn4qaQwyUdk,
depth warning, FyjjRu4ARLzpsPLhNh, [chunk_event=<uninitialized>, stream_event=<uninitialized>, extract_filename=<uninitialized>, extract_limit=0, extract_limit_includes_missing=T], 2
depth warning, Fz3QLf4Bn4qaQwyUdk, [chunk_event=<uninitialized>, stream_event=<uninitialized>, extract_filename=<uninitialized>, extract_limit=0, extract_limit_includes_missing=T], 2
depth warning, FyjjRu4ARLzpsPLhNh, [chunk_event=<uninitialized>, stream_event=<uninitialized>, extract_filename=<uninitialized>, extract_limit=0], 2
depth warning, Fz3QLf4Bn4qaQwyUdk, [chunk_event=<uninitialized>, stream_event=<uninitialized>, extract_filename=<uninitialized>, extract_limit=0], 2
data2, F2Qpmk14ATv4vFSEsi, from 1:hello world
data1, FcRmxz1fPbKQEgGGUi, hello world

View file

@ -1,38 +0,0 @@
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-large-gap.pcap %INPUT efname=1 FileExtract::default_limit_includes_missing=T
# @TEST-EXEC: btest-diff --binary extract_files/1
# @TEST-EXEC: btest-diff 1.out
# @TEST-EXEC: mv files.log files-1.log
# @TEST-EXEC: btest-diff files-1.log
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-large-gap.pcap %INPUT efname=2 FileExtract::default_limit_includes_missing=F
# @TEST-EXEC: rm extract_files/2
# @TEST-EXEC: btest-diff 2.out
# @TEST-EXEC: mv files.log files-2.log
# @TEST-EXEC: btest-diff files-2.log
# @TEST-EXEC: zeek -C -b -r $TRACES/http/http-large-gap.pcap %INPUT efname=3 FileExtract::default_limit_includes_missing=F max_extract=1
# @TEST-EXEC: rm extract_files/3
# @TEST-EXEC: btest-diff 3.out
# @TEST-EXEC: mv files.log files-3.log
# @TEST-EXEC: btest-diff files-3.log
@load base/files/extract
@load base/protocols/http
global outfile: file;
const max_extract: count = 10 &redef;
const efname: string = "0" &redef;
event file_new(f: fa_file)
{
Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
[$extract_filename=efname, $extract_limit=max_extract]);
}
event file_extraction_limit(f: fa_file, args: any, limit: count, len: count)
{
print outfile, "file_extraction_limit", limit, len;
}
event zeek_init()
{
outfile = open(fmt("%s.out", efname));
}