diff --git a/scripts/base/files/extract/main.zeek b/scripts/base/files/extract/main.zeek index 93288c5127..211b65536e 100644 --- a/scripts/base/files/extract/main.zeek +++ b/scripts/base/files/extract/main.zeek @@ -9,6 +9,10 @@ export { ## The default max size for extracted files (they won't exceed this ## number of bytes). A value of zero means unlimited. + ## + ## Note: Holes in files do not count towards these limits. Files with + ## holes are created as sparse files on disk. This means that their + ## apparent size can exceed this limit. option default_limit = 0; redef record Files::Info += { diff --git a/src/file_analysis/analyzer/extract/Extract.cc b/src/file_analysis/analyzer/extract/Extract.cc index e6805b867a..284683f51e 100644 --- a/src/file_analysis/analyzer/extract/Extract.cc +++ b/src/file_analysis/analyzer/extract/Extract.cc @@ -15,7 +15,7 @@ namespace zeek::file_analysis::detail Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename, uint64_t arg_limit) : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file), - filename(arg_filename), limit(arg_limit), depth(0) + filename(arg_filename), limit(arg_limit), written(0) { char buf[128]; file_stream = fopen(filename.data(), "wb"); @@ -67,7 +67,15 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis:: return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount()); } -static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uint64_t* n) +/** + * Check if we are exceeding the write limit with this write. + * @param lim size limit + * @param written how many bytes we have written so far + * @param len length of the write + * @param n number of bytes to write to keep within limit + * @returns true if limit exceeded + */ +static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n) { if ( lim == 0 ) { @@ -75,14 +83,14 @@ static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uin return false; } - if ( depth >= lim ) + if ( written >= lim ) { *n = 0; return true; } - else if ( depth + len > lim ) + else if ( written + len > lim ) { - *n = lim - depth; + *n = lim - written; return true; } else @@ -99,7 +107,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len) return false; uint64_t towrite = 0; - bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); + bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite); if ( limit_exceeded && file_extraction_limit ) { @@ -108,7 +116,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len) {f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)}); // Limit may have been modified by a BIF, re-check it. - limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); + limit_exceeded = check_limit_exceeded(limit, written, len, &towrite); } char buf[128]; @@ -124,7 +132,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len) return false; } - depth += towrite; + written += towrite; } // Assume we may not try to write anything more for a while due to reaching @@ -145,23 +153,14 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len) if ( ! file_stream ) return false; - if ( depth == offset ) + if ( fseek(file_stream, len + offset, SEEK_SET) != 0 ) { - char* tmp = new char[len](); - - if ( fwrite(tmp, len, 1, file_stream) != 1 ) - { - char buf[128]; - util::zeek_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("failed to write to extracted file %s: %s", filename.data(), buf); - fclose(file_stream); - file_stream = nullptr; - delete[] tmp; - return false; - } - - delete[] tmp; - depth += len; + char buf[128]; + util::zeek_strerror_r(errno, buf, sizeof(buf)); + reporter->Error("failed to seek in extracted file %s: %s", filename.data(), buf); + fclose(file_stream); + file_stream = nullptr; + return false; } return true; diff --git a/src/file_analysis/analyzer/extract/Extract.h b/src/file_analysis/analyzer/extract/Extract.h index ffe1a69e19..96a18c49f1 100644 --- a/src/file_analysis/analyzer/extract/Extract.h +++ b/src/file_analysis/analyzer/extract/Extract.h @@ -72,8 +72,8 @@ protected: private: std::string filename; FILE* file_stream; - uint64_t limit; - uint64_t depth; + uint64_t limit; // the file extraction limit + uint64_t written; // how many bytes we have written so far }; } // namespace zeek::file_analysis::detail