mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Add extract_limit_includes_missing option for file extraction
Setting this option to false does not count missing bytes in files towards the extraction limits, and allows to extract data up to the desired limit, even when partial files are written. When missing bytes are encountered, files are now written as sparse files. Using this option requires the underlying storage and utilities to support sparse files.
This commit is contained in:
parent
9928f7efb7
commit
e18edfa452
14 changed files with 132 additions and 11 deletions
|
@ -13,9 +13,10 @@ namespace zeek::file_analysis::detail
|
|||
{
|
||||
|
||||
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
|
||||
uint64_t arg_limit)
|
||||
uint64_t arg_limit, bool arg_limit_includes_missing)
|
||||
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
|
||||
filename(arg_filename), limit(arg_limit), written(0)
|
||||
filename(arg_filename), limit(arg_limit), written(0),
|
||||
limit_includes_missing(arg_limit_includes_missing)
|
||||
{
|
||||
char buf[128];
|
||||
file_stream = fopen(filename.data(), "wb");
|
||||
|
@ -60,11 +61,14 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::
|
|||
{
|
||||
const auto& fname = get_extract_field_val(args, "extract_filename");
|
||||
const auto& limit = get_extract_field_val(args, "extract_limit");
|
||||
const auto& extract_limit_includes_missing = get_extract_field_val(
|
||||
args, "extract_limit_includes_missing");
|
||||
|
||||
if ( ! fname || ! limit )
|
||||
if ( ! fname || ! limit || ! extract_limit_includes_missing )
|
||||
return nullptr;
|
||||
|
||||
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount());
|
||||
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount(),
|
||||
extract_limit_includes_missing->AsBool());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -153,6 +157,29 @@ bool Extract::Undelivered(uint64_t offset, uint64_t len)
|
|||
if ( ! file_stream )
|
||||
return false;
|
||||
|
||||
if ( limit_includes_missing )
|
||||
{
|
||||
uint64_t towrite = 0;
|
||||
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
|
||||
// if the limit is exceeded, we have to raise the event. This gives scripts the opportunity
|
||||
// to raise the limit.
|
||||
if ( limit_exceeded && file_extraction_limit )
|
||||
{
|
||||
file_analysis::File* f = GetFile();
|
||||
f->FileEvent(file_extraction_limit,
|
||||
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
|
||||
// we have to check again if the limit is still exceedee
|
||||
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
|
||||
}
|
||||
|
||||
// if the limit is exceeded, abort and don't do anything - no reason to seek.
|
||||
if ( limit_exceeded )
|
||||
return false;
|
||||
|
||||
// if we don't skip holes, count this hole against the write limit
|
||||
written += len;
|
||||
}
|
||||
|
||||
if ( fseek(file_stream, len + offset, SEEK_SET) != 0 )
|
||||
{
|
||||
char buf[128];
|
||||
|
|
|
@ -65,15 +65,17 @@ protected:
|
|||
* @param arg_filename a file system path which specifies the local file
|
||||
* to which the contents of the file will be extracted/written.
|
||||
* @param arg_limit the maximum allowed file size.
|
||||
* @param arg_limit_includes_missing missing bytes count towards limit if true.
|
||||
*/
|
||||
Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
|
||||
uint64_t arg_limit);
|
||||
uint64_t arg_limit, bool arg_limit_includes_missing);
|
||||
|
||||
private:
|
||||
std::string filename;
|
||||
FILE* file_stream;
|
||||
uint64_t limit; // the file extraction limit
|
||||
uint64_t written; // how many bytes we have written so far
|
||||
bool limit_includes_missing; // do count missing bytes against limit if true
|
||||
};
|
||||
|
||||
} // namespace zeek::file_analysis::detail
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue