File extraction: use fseek

In the past, we allocated a buffer with zeroes and wrote that with
fwrite. Now, instead we just fseek to the correct offset.

This changes the way in which the file extract limit is counted a bit;
skipped bytes do no longer count against the file size limit.

(cherry picked from commit 5071592e9b7105090a1d9de19689c499070749d4)
This commit is contained in:
Johanna Amann 2023-09-06 10:08:27 +01:00 committed by Tim Wojtulewicz
parent 5934e143aa
commit 9928f7efb7
3 changed files with 29 additions and 26 deletions

View file

@ -9,6 +9,10 @@ export {
## The default max size for extracted files (they won't exceed this
## number of bytes). A value of zero means unlimited.
##
## Note: Holes in files do not count towards these limits. Files with
## holes are created as sparse files on disk. This means that their
## apparent size can exceed this limit.
option default_limit = 0;
redef record Files::Info += {

View file

@ -15,7 +15,7 @@ namespace zeek::file_analysis::detail
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
filename(arg_filename), limit(arg_limit), depth(0)
filename(arg_filename), limit(arg_limit), written(0)
{
char buf[128];
file_stream = fopen(filename.data(), "wb");
@ -67,7 +67,15 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount());
}
static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uint64_t* n)
/**
* Check if we are exceeding the write limit with this write.
* @param lim size limit
* @param written how many bytes we have written so far
* @param len length of the write
* @param n number of bytes to write to keep within limit
* @returns true if limit exceeded
*/
static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n)
{
if ( lim == 0 )
{
@ -75,14 +83,14 @@ static bool check_limit_exceeded(uint64_t lim, uint64_t depth, uint64_t len, uin
return false;
}
if ( depth >= lim )
if ( written >= lim )
{
*n = 0;
return true;
}
else if ( depth + len > lim )
else if ( written + len > lim )
{
*n = lim - depth;
*n = lim - written;
return true;
}
else
@ -99,7 +107,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false;
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
if ( limit_exceeded && file_extraction_limit )
{
@ -108,7 +116,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
char buf[128];
@ -124,7 +132,7 @@ bool Extract::DeliverStream(const u_char* data, uint64_t len)
return false;
}
depth += towrite;
written += towrite;
}
// Assume we may not try to write anything more for a while due to reaching
@ -145,25 +153,16 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len)
if ( ! file_stream )
return false;
if ( depth == offset )
{
char* tmp = new char[len]();
if ( fwrite(tmp, len, 1, file_stream) != 1 )
if ( fseek(file_stream, len + offset, SEEK_SET) != 0 )
{
char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("failed to write to extracted file %s: %s", filename.data(), buf);
reporter->Error("failed to seek in extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
delete[] tmp;
return false;
}
delete[] tmp;
depth += len;
}
return true;
}

View file

@ -72,8 +72,8 @@ protected:
private:
std::string filename;
FILE* file_stream;
uint64_t limit;
uint64_t depth;
uint64_t limit; // the file extraction limit
uint64_t written; // how many bytes we have written so far
};
} // namespace zeek::file_analysis::detail