Merge remote-tracking branch 'origin/topic/johanna/gh-375-remove-brofile-cache'

* origin/topic/johanna/gh-375-remove-brofile-cache:
  Remove the BroFile cache

Fixes GH-375
This commit is contained in:
Jon Siwek 2019-06-27 11:50:17 -07:00
commit dafc44e8b9
18 changed files with 62 additions and 321 deletions

View file

@ -1,4 +1,8 @@
2.6-531 | 2019-06-27 12:09:08 -0700
* GH-375: Remove the BroFile cache (Johanna Amann, Corelight)
2.6-529 | 2019-06-27 10:12:34 -0700
* Fix creating a StringVal from std::string. (Johanna Amann, Corelight)

7
NEWS
View file

@ -467,6 +467,13 @@ Removed Functionality
upgrade path. The ``OS_version_found`` event as well as the
``generate_OS_version_event`` configuration option were removed.
- Removed the ``max_files_in_cache`` option and the associated
"file caching" feature it's associated with. That feature allowed
one to open many scripting-layer ``file`` objects and potentially
bypass the operating system's resource limits for open files.
This is typically not necessary and it's a problem that is more
appropriately addressed at the system configuration level.
Deprecated Functionality
------------------------

View file

@ -1 +1 @@
2.6-529
2.6-531

2
doc

@ -1 +1 @@
Subproject commit a840cea13e7c21951079422d6ec8971fb6812b06
Subproject commit 8dad64e4a1b48b8b516690e94ae83e81f9efdfde

View file

@ -4622,11 +4622,6 @@ module GLOBAL;
## BPF filter the user has set via the -f command line options. Empty if none.
const cmd_line_bpf_filter = "" &redef;
## The maximum number of open files to keep cached at a given time.
## If set to zero, this is automatically determined by inspecting
## the current/maximum limit on open files for the process.
const max_files_in_cache = 0 &redef;
## Deprecated.
const log_rotate_base_time = "0:00" &redef;

View file

@ -28,16 +28,10 @@
#include "Event.h"
#include "Reporter.h"
// The following could in principle be part of a "file manager" object.
std::list<std::pair<std::string, BroFile*>> BroFile::open_files;
#define MAX_FILE_CACHE_SIZE 512
static int num_files_in_cache = 0;
static BroFile* head = 0;
static BroFile* tail = 0;
// Maximizes the number of open file descriptors and returns the number
// that we should use for the cache.
static int maximize_num_fds()
// Maximizes the number of open file descriptors.
static void maximize_num_fds()
{
struct rlimit rl;
if ( getrlimit(RLIMIT_NOFILE, &rl) < 0 )
@ -46,11 +40,7 @@ static int maximize_num_fds()
if ( rl.rlim_max == RLIM_INFINITY )
{
// Don't try raising the current limit.
if ( rl.rlim_cur == RLIM_INFINITY )
// Let's not be too ambitious.
return MAX_FILE_CACHE_SIZE;
else
return rl.rlim_cur / 2;
return;
}
// See if we can raise the current to the maximum.
@ -58,11 +48,8 @@ static int maximize_num_fds()
if ( setrlimit(RLIMIT_NOFILE, &rl) < 0 )
reporter->FatalError("maximize_num_fds(): setrlimit failed");
return rl.rlim_cur / 2;
}
BroFile::BroFile(FILE* arg_f)
{
Init();
@ -104,7 +91,6 @@ BroFile::BroFile(const char* arg_name, const char* arg_access, BroType* arg_t)
{
reporter->Error("cannot open %s: %s", name, strerror(errno));
is_open = 0;
okay_to_manage = 0;
}
}
@ -127,14 +113,15 @@ const char* BroFile::Name() const
bool BroFile::Open(FILE* file, const char* mode)
{
static bool fds_maximized = false;
open_time = network_time ? network_time : current_time();
if ( ! max_files_in_cache )
if ( ! fds_maximized )
{
// Haven't initialized yet.
max_files_in_cache = maximize_num_fds();
if ( num_files_in_cache >= max_files_in_cache )
PurgeCache();
maximize_num_fds();
fds_maximized = true;
}
f = file;
@ -148,22 +135,15 @@ bool BroFile::Open(FILE* file, const char* mode)
SetBuf(buffered);
if ( f )
if ( ! f )
{
// These are the only files we manage, because we open them
// ourselves and hence don't have any surprises regarding
// whether we're allowed to close them.
is_open = okay_to_manage = 1;
InsertAtBeginning();
}
else
{
// No point managing it.
is_open = okay_to_manage = 0;
is_open = 0;
return false;
}
is_open = 1;
open_files.emplace_back(std::make_pair(name, this));
RaiseOpenEvent();
return true;
@ -185,9 +165,7 @@ BroFile::~BroFile()
void BroFile::Init()
{
is_open = okay_to_manage = is_in_cache = 0;
position = 0;
next = prev = 0;
is_open = 0;
attrs = 0;
buffered = true;
print_hook = true;
@ -201,56 +179,6 @@ void BroFile::Init()
FILE* BroFile::File()
{
if ( okay_to_manage && ! is_in_cache )
f = BringIntoCache();
return f;
}
FILE* BroFile::BringIntoCache()
{
char buf[256];
if ( f )
reporter->InternalError("BroFile non-nil non-open file");
if ( num_files_in_cache >= max_files_in_cache )
PurgeCache();
if ( position == 0 )
// Need to truncate it.
f = fopen(name, access);
else
// Don't clobber it.
f = fopen(name, "a");
if ( ! f )
{
bro_strerror_r(errno, buf, sizeof(buf));
reporter->Error("can't open %s: %s", name, buf);
f = fopen("/dev/null", "w");
if ( f )
{
okay_to_manage = 0;
return f;
}
bro_strerror_r(errno, buf, sizeof(buf));
reporter->Error("can't open /dev/null: %s", buf);
return 0;
}
if ( fseek(f, position, SEEK_SET) < 0 )
{
bro_strerror_r(errno, buf, sizeof(buf));
reporter->Error("reopen seek failed: %s", buf);
}
InsertAtBeginning();
RaiseOpenEvent();
return f;
}
@ -285,126 +213,30 @@ int BroFile::Close()
if ( f == stdin || f == stdout || f == stderr )
return 0;
if ( is_in_cache )
{
Unlink();
if ( f )
{
fclose(f);
f = 0;
open_time = 0;
}
is_open = 0;
okay_to_manage = 0; // no longer managed since will never reopen
return 1;
}
// Not managed.
if ( ! f )
return 0;
fclose(f);
f = 0;
f = nullptr;
open_time = is_open = 0;
Unlink();
return 1;
}
void BroFile::Suspend()
{
if ( ! is_in_cache )
reporter->InternalError("BroFile::Suspend() called for non-cached file");
if ( ! is_open )
reporter->InternalError("BroFile::Suspend() called for non-open file");
Unlink();
if ( ! f )
reporter->InternalError("BroFile::Suspend() called for nil file");
if ( (position = ftell(f)) < 0 )
{
char buf[256];
bro_strerror_r(errno, buf, sizeof(buf));
reporter->Error("ftell failed: %s", buf);
position = 0;
}
fclose(f);
f = 0;
}
void BroFile::PurgeCache()
{
if ( tail )
{
tail->Suspend();
return;
}
reporter->InternalWarning("BroFile purge of empty cache");
}
void BroFile::Unlink()
{
if ( is_in_cache )
for ( auto it = open_files.begin(); it != open_files.end(); ++it)
{
if ( head == this )
head = Next();
else
Prev()->SetNext(next);
if ( tail == this )
tail = Prev();
else
Next()->SetPrev(prev);
if ( (head || tail) && ! (head && tail) )
reporter->InternalError("BroFile link list botch");
is_in_cache = 0;
prev = next = 0;
if ( --num_files_in_cache < 0 )
reporter->InternalError("BroFile underflow of file cache");
if ( (*it).second == this )
{
open_files.erase(it);
return;
}
}
}
void BroFile::InsertAtBeginning()
{
if ( ! head )
{
head = tail = this;
next = prev = 0;
}
else
{
SetNext(head);
SetPrev(0);
head->SetPrev(this);
head = this;
}
if ( ++num_files_in_cache > max_files_in_cache )
reporter->InternalError("BroFile overflow of file cache");
is_in_cache = 1;
}
void BroFile::MoveToBeginning()
{
if ( head == this )
return; // already at the beginning
if ( ! is_in_cache || ! prev )
reporter->InternalError("BroFile inconsistency in MoveToBeginning()");
Unlink();
InsertAtBeginning();
}
void BroFile::Describe(ODesc* d) const
{
d->AddSP("file");
@ -444,9 +276,6 @@ RecordVal* BroFile::Rotate()
if ( f == stdin || f == stdout || f == stderr )
return 0;
if ( okay_to_manage && ! is_in_cache )
BringIntoCache();
RecordVal* info = new RecordVal(rotate_info);
FILE* newf = rotate_file(name, info);
@ -459,6 +288,7 @@ RecordVal* BroFile::Rotate()
info->Assign(2, new Val(open_time, TYPE_TIME));
Unlink();
fclose(f);
f = 0;
@ -466,14 +296,13 @@ RecordVal* BroFile::Rotate()
return info;
}
void BroFile::CloseCachedFiles()
void BroFile::CloseOpenFiles()
{
BroFile* next;
for ( BroFile* f = head; f; f = next )
auto it = open_files.begin();
while ( it != open_files.end() )
{
next = f->next;
if ( f->is_in_cache )
f->Close();
auto el = it++;
(*el).second->Close();
}
}
@ -482,9 +311,6 @@ int BroFile::Write(const char* data, int len)
if ( ! is_open )
return 0;
if ( ! is_in_cache && okay_to_manage )
BringIntoCache();
if ( ! len )
len = strlen(data);
@ -519,10 +345,13 @@ double BroFile::Size()
BroFile* BroFile::GetFile(const char* name)
{
for ( BroFile* f = head; f; f = f->next )
for ( const auto &el : open_files )
{
if ( f->name && streq(name, f->name) )
return f;
if ( el.first == name )
{
Ref(el.second);
return el.second;
}
}
return new BroFile(name, "w", 0);

View file

@ -8,6 +8,9 @@
#include "Obj.h"
#include "Attr.h"
#include <list>
#include <utility>
# ifdef NEED_KRB5_H
# include <krb5.h>
# endif // NEED_KRB5_H
@ -54,8 +57,8 @@ public:
// Returns the current size of the file, after fresh stat'ing.
double Size();
// Close all files which are managed by us.
static void CloseCachedFiles();
// Close all files which are currently open.
static void CloseOpenFiles();
// Get the file with the given name, opening it if it doesn't yet exist.
static BroFile* GetFile(const char* name);
@ -75,24 +78,14 @@ protected:
* If file is not given and mode is, the filename will be opened with that
* access mode.
*/
bool Open(FILE* f = 0, const char* mode = 0);
bool Open(FILE* f = nullptr, const char* mode = 0);
BroFile* Prev() { return prev; }
BroFile* Next() { return next; }
void SetPrev(BroFile* f) { prev = f; }
void SetNext(BroFile* f) { next = f; }
void Suspend();
void PurgeCache();
void Unlink();
void InsertAtBeginning();
void MoveToBeginning();
// Returns nil if the file is not active, was in error, etc.
// (Protected because we do not want anyone to write directly
// to the file.)
FILE* File();
FILE* BringIntoCache();
// Raises a file_opened event.
void RaiseOpenEvent();
@ -101,12 +94,7 @@ protected:
BroType* t;
char* name;
char* access;
int is_in_cache; // whether it's currently in the open-file cache
int is_open; // whether the file is open in a general sense
int okay_to_manage; // we're allowed to cache/uncache
long position; // only valid if ! is_in_cache
BroFile* next; // doubly-linked list of cached files
BroFile* prev;
Attributes* attrs;
bool buffered;
double open_time;
@ -114,6 +102,9 @@ protected:
bool raw_output;
static const int MIN_BUFFER_SIZE = 1024;
private:
static std::list<std::pair<std::string, BroFile*>> open_files;
};
#endif

View file

@ -155,7 +155,6 @@ TableVal* preserve_orig_addr;
TableVal* preserve_resp_addr;
TableVal* preserve_other_addr;
int max_files_in_cache;
RecordType* rotate_info;
StringVal* log_rotate_base_time;
@ -229,7 +228,6 @@ void init_general_global_var()
table_expire_delay = opt_internal_double("table_expire_delay");
table_incremental_step = opt_internal_int("table_incremental_step");
max_files_in_cache = opt_internal_int("max_files_in_cache");
rotate_info = internal_type("rotate_info")->AsRecordType();
log_rotate_base_time = opt_internal_string("log_rotate_base_time");

View file

@ -158,7 +158,6 @@ extern TableVal* preserve_other_addr;
extern double connection_status_update_interval;
extern int max_files_in_cache;
extern RecordType* rotate_info;
extern StringVal* log_rotate_base_time;

View file

@ -100,10 +100,7 @@ struct val_converter {
auto file = BroFile::GetFile(a.data());
if ( file )
{
Ref(file);
return new Val(file);
}
return nullptr;
}

View file

@ -372,7 +372,7 @@ void termination_signal()
// Close files after net_delete(), because net_delete()
// might write to connection content files.
BroFile::CloseCachedFiles();
BroFile::CloseOpenFiles();
delete rule_matcher;
@ -1126,7 +1126,7 @@ int main(int argc, char** argv)
// Close files after net_delete(), because net_delete()
// might write to connection content files.
BroFile::CloseCachedFiles();
BroFile::CloseOpenFiles();
}
else
{

View file

@ -1,4 +0,0 @@
opened
write 0
write 3
write 6

View file

@ -1,4 +0,0 @@
opened
write 1
write 4
write 7

View file

@ -1,4 +0,0 @@
opened
write 2
write 5
write 8

View file

@ -1,6 +0,0 @@
opened
write 0
opened
write 3
opened
write 6

View file

@ -1,6 +0,0 @@
opened
write 1
opened
write 4
opened
write 7

View file

@ -1,6 +0,0 @@
opened
write 2
opened
write 5
opened
write 8

View file

@ -1,49 +0,0 @@
# This checks that the interactions between open-file caching and
# cloning works ok. In the first case, all files can fit
# in the cache, but get cloned before every write. In the
# second case, files are eventually forced out of the cache; later writing
# requires re-opening.
# @TEST-EXEC: zeek -b %INPUT "test_file_prefix=one"
# @TEST-EXEC: btest-diff one0
# @TEST-EXEC: btest-diff one1
# @TEST-EXEC: btest-diff one2
# @TEST-EXEC: zeek -b %INPUT "test_file_prefix=two" "max_files_in_cache=2"
# @TEST-EXEC: btest-diff two0
# @TEST-EXEC: btest-diff two1
# @TEST-EXEC: btest-diff two2
const test_file_prefix = "" &redef;
global file_table: table[string] of file;
global iterations: vector of count = vector(0,1,2,3,4,5,6,7,8);
function write_to_file(c: count)
{
local f: file;
# Take turns writing across three output files.
local filename = fmt("%s%s", test_file_prefix, c % 3 );
if ( filename in file_table )
f = file_table[filename];
else
{
f = open(filename);
file_table[filename] = f;
}
# This when block is a trick to get the frame cloned
# and thus serialize the local file value
when ( local s = fmt("write %d", c) )
print f, s;
}
event file_opened(f: file)
{
print f, "opened";
}
event zeek_init()
{
for ( i in iterations )
write_to_file(iterations[i]);
}