File extraction: use fseek

In the past, we allocated a buffer with zeroes and wrote that with
fwrite. Now, instead we just fseek to the correct offset.

This changes the way in which the file extract limit is counted a bit;
skipped bytes do no longer count against the file size limit.

(cherry picked from commit 5071592e9b7105090a1d9de19689c499070749d4)
This commit is contained in:
Johanna Amann 2023-09-06 10:08:27 +01:00 committed by Tim Wojtulewicz
parent 5934e143aa
commit 9928f7efb7
3 changed files with 29 additions and 26 deletions

View file

@ -9,6 +9,10 @@ export {
## The default max size for extracted files (they won't exceed this ## The default max size for extracted files (they won't exceed this
## number of bytes). A value of zero means unlimited. ## number of bytes). A value of zero means unlimited.
##
## Note: Holes in files do not count towards these limits. Files with
## holes are created as sparse files on disk. This means that their
## apparent size can exceed this limit.
option default_limit = 0; option default_limit = 0;
redef record Files::Info += { redef record Files::Info += {

View file

@ -15,7 +15,7 @@ namespace zeek::file_analysis::detail
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename, Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit) uint64_t arg_limit)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file), : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
filename(arg_filename), limit(arg_limit), depth(0) filename(arg_filename), limit(arg_limit), written(0)
{ {
char buf[128]; char buf[128];
file_stream = fopen(filename.data(), "wb"); file_stream = fopen(filename.data(), "wb");
@ -67,7 +67,15 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount()); return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount());
} }
static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uint64_t* n) /**
* Check if we are exceeding the write limit with this write.
* @param lim size limit
* @param written how many bytes we have written so far
* @param len length of the write
* @param n number of bytes to write to keep within limit
* @returns true if limit exceeded
*/
static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n)
{ {
if ( lim == 0 ) if ( lim == 0 )
{ {
@ -75,14 +83,14 @@ static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uin
return false; return false;
} }
if ( depth >= lim ) if ( written >= lim )
{ {
*n = 0; *n = 0;
return true; return true;
} }
else if ( depth + len > lim ) else if ( written + len > lim )
{ {
*n = lim - depth; *n = lim - written;
return true; return true;
} }
else else
@ -99,7 +107,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false; return false;
uint64_t towrite = 0; uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
if ( limit_exceeded && file_extraction_limit ) if ( limit_exceeded && file_extraction_limit )
{ {
@ -108,7 +116,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)}); {f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// Limit may have been modified by a BIF, re-check it. // Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
} }
char buf[128]; char buf[128];
@ -124,7 +132,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false; return false;
} }
depth += towrite; written += towrite;
} }
// Assume we may not try to write anything more for a while due to reaching // Assume we may not try to write anything more for a while due to reaching
@ -145,23 +153,14 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len)
if ( ! file_stream ) if ( ! file_stream )
return false; return false;
if ( depth == offset ) if ( fseek(file_stream, len + offset, SEEK_SET) != 0 )
{ {
char* tmp = new char[len](); char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
if ( fwrite(tmp, len, 1, file_stream) != 1 ) reporter->Error("failed to seek in extracted file %s: %s", filename.data(), buf);
{ fclose(file_stream);
char buf[128]; file_stream = nullptr;
util::zeek_strerror_r(errno, buf, sizeof(buf)); return false;
reporter->Error("failed to write to extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
delete[] tmp;
return false;
}
delete[] tmp;
depth += len;
} }
return true; return true;

View file

@ -72,8 +72,8 @@ protected:
private: private:
std::string filename; std::string filename;
FILE* file_stream; FILE* file_stream;
uint64_t limit; uint64_t limit; // the file extraction limit
uint64_t depth; uint64_t written; // how many bytes we have written so far
}; };
} // namespace zeek::file_analysis::detail } // namespace zeek::file_analysis::detail