From 5052dc03fc937ba94a2c231d27cdd778930a6988 Mon Sep 17 00:00:00 2001 From: Johanna Amann Date: Wed, 26 Jun 2019 15:03:45 -0700 Subject: [PATCH] Remove the BroFile cache GH-375 --- scripts/base/init-bare.zeek | 5 - src/File.cc | 247 +++--------------- src/File.h | 27 +- src/NetVar.cc | 2 - src/NetVar.h | 1 - src/broker/Data.cc | 3 - src/main.cc | 4 +- .../Baseline/core.file-caching-cloning/one0 | 4 - .../Baseline/core.file-caching-cloning/one1 | 4 - .../Baseline/core.file-caching-cloning/one2 | 4 - .../Baseline/core.file-caching-cloning/two0 | 6 - .../Baseline/core.file-caching-cloning/two1 | 6 - .../Baseline/core.file-caching-cloning/two2 | 6 - testing/btest/core/file-caching-cloning.test | 49 ---- 14 files changed, 49 insertions(+), 319 deletions(-) delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/one0 delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/one1 delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/one2 delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/two0 delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/two1 delete mode 100644 testing/btest/Baseline/core.file-caching-cloning/two2 delete mode 100644 testing/btest/core/file-caching-cloning.test diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 8bc02f379d..1910dd82fb 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -4622,11 +4622,6 @@ module GLOBAL; ## BPF filter the user has set via the -f command line options. Empty if none. const cmd_line_bpf_filter = "" &redef; -## The maximum number of open files to keep cached at a given time. -## If set to zero, this is automatically determined by inspecting -## the current/maximum limit on open files for the process. -const max_files_in_cache = 0 &redef; - ## Deprecated. const log_rotate_base_time = "0:00" &redef; diff --git a/src/File.cc b/src/File.cc index 8ac229b9fc..d1f706514f 100644 --- a/src/File.cc +++ b/src/File.cc @@ -28,16 +28,10 @@ #include "Event.h" #include "Reporter.h" -// The following could in principle be part of a "file manager" object. +std::list> BroFile::open_files; -#define MAX_FILE_CACHE_SIZE 512 -static int num_files_in_cache = 0; -static BroFile* head = 0; -static BroFile* tail = 0; - -// Maximizes the number of open file descriptors and returns the number -// that we should use for the cache. -static int maximize_num_fds() +// Maximizes the number of open file descriptors. +static void maximize_num_fds() { struct rlimit rl; if ( getrlimit(RLIMIT_NOFILE, &rl) < 0 ) @@ -46,11 +40,7 @@ static int maximize_num_fds() if ( rl.rlim_max == RLIM_INFINITY ) { // Don't try raising the current limit. - if ( rl.rlim_cur == RLIM_INFINITY ) - // Let's not be too ambitious. - return MAX_FILE_CACHE_SIZE; - else - return rl.rlim_cur / 2; + return; } // See if we can raise the current to the maximum. @@ -58,11 +48,8 @@ static int maximize_num_fds() if ( setrlimit(RLIMIT_NOFILE, &rl) < 0 ) reporter->FatalError("maximize_num_fds(): setrlimit failed"); - - return rl.rlim_cur / 2; } - BroFile::BroFile(FILE* arg_f) { Init(); @@ -104,7 +91,6 @@ BroFile::BroFile(const char* arg_name, const char* arg_access, BroType* arg_t) { reporter->Error("cannot open %s: %s", name, strerror(errno)); is_open = 0; - okay_to_manage = 0; } } @@ -127,14 +113,15 @@ const char* BroFile::Name() const bool BroFile::Open(FILE* file, const char* mode) { + static bool fds_maximized = false; open_time = network_time ? network_time : current_time(); - if ( ! max_files_in_cache ) + if ( ! fds_maximized ) + { // Haven't initialized yet. - max_files_in_cache = maximize_num_fds(); - - if ( num_files_in_cache >= max_files_in_cache ) - PurgeCache(); + maximize_num_fds(); + fds_maximized = true; + } f = file; @@ -148,22 +135,15 @@ bool BroFile::Open(FILE* file, const char* mode) SetBuf(buffered); - if ( f ) + if ( ! f ) { - // These are the only files we manage, because we open them - // ourselves and hence don't have any surprises regarding - // whether we're allowed to close them. - is_open = okay_to_manage = 1; - - InsertAtBeginning(); - } - else - { - // No point managing it. - is_open = okay_to_manage = 0; + is_open = 0; return false; } + is_open = 1; + open_files.emplace_back(std::make_pair(name, this)); + RaiseOpenEvent(); return true; @@ -185,9 +165,7 @@ BroFile::~BroFile() void BroFile::Init() { - is_open = okay_to_manage = is_in_cache = 0; - position = 0; - next = prev = 0; + is_open = 0; attrs = 0; buffered = true; print_hook = true; @@ -201,56 +179,6 @@ void BroFile::Init() FILE* BroFile::File() { - if ( okay_to_manage && ! is_in_cache ) - f = BringIntoCache(); - - return f; - } - -FILE* BroFile::BringIntoCache() - { - char buf[256]; - - if ( f ) - reporter->InternalError("BroFile non-nil non-open file"); - - if ( num_files_in_cache >= max_files_in_cache ) - PurgeCache(); - - if ( position == 0 ) - // Need to truncate it. - f = fopen(name, access); - else - // Don't clobber it. - f = fopen(name, "a"); - - if ( ! f ) - { - bro_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("can't open %s: %s", name, buf); - - f = fopen("/dev/null", "w"); - - if ( f ) - { - okay_to_manage = 0; - return f; - } - - bro_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("can't open /dev/null: %s", buf); - return 0; - } - - if ( fseek(f, position, SEEK_SET) < 0 ) - { - bro_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("reopen seek failed: %s", buf); - } - - InsertAtBeginning(); - RaiseOpenEvent(); - return f; } @@ -285,126 +213,30 @@ int BroFile::Close() if ( f == stdin || f == stdout || f == stderr ) return 0; - if ( is_in_cache ) - { - Unlink(); - if ( f ) - { - fclose(f); - f = 0; - open_time = 0; - } - - is_open = 0; - okay_to_manage = 0; // no longer managed since will never reopen - - return 1; - } - - // Not managed. if ( ! f ) return 0; fclose(f); - f = 0; + f = nullptr; + open_time = is_open = 0; + + Unlink(); return 1; } -void BroFile::Suspend() - { - if ( ! is_in_cache ) - reporter->InternalError("BroFile::Suspend() called for non-cached file"); - - if ( ! is_open ) - reporter->InternalError("BroFile::Suspend() called for non-open file"); - - Unlink(); - - if ( ! f ) - reporter->InternalError("BroFile::Suspend() called for nil file"); - - if ( (position = ftell(f)) < 0 ) - { - char buf[256]; - bro_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("ftell failed: %s", buf); - position = 0; - } - - fclose(f); - f = 0; - } - -void BroFile::PurgeCache() - { - if ( tail ) - { - tail->Suspend(); - return; - } - - reporter->InternalWarning("BroFile purge of empty cache"); - } - void BroFile::Unlink() { - if ( is_in_cache ) + for ( auto it = open_files.begin(); it != open_files.end(); ++it) { - if ( head == this ) - head = Next(); - else - Prev()->SetNext(next); - - if ( tail == this ) - tail = Prev(); - else - Next()->SetPrev(prev); - - if ( (head || tail) && ! (head && tail) ) - reporter->InternalError("BroFile link list botch"); - - is_in_cache = 0; - prev = next = 0; - - if ( --num_files_in_cache < 0 ) - reporter->InternalError("BroFile underflow of file cache"); + if ( (*it).second == this ) + { + open_files.erase(it); + return; + } } } -void BroFile::InsertAtBeginning() - { - if ( ! head ) - { - head = tail = this; - next = prev = 0; - } - else - { - SetNext(head); - SetPrev(0); - head->SetPrev(this); - head = this; - } - - if ( ++num_files_in_cache > max_files_in_cache ) - reporter->InternalError("BroFile overflow of file cache"); - - is_in_cache = 1; - } - -void BroFile::MoveToBeginning() - { - if ( head == this ) - return; // already at the beginning - - if ( ! is_in_cache || ! prev ) - reporter->InternalError("BroFile inconsistency in MoveToBeginning()"); - - Unlink(); - InsertAtBeginning(); - } - void BroFile::Describe(ODesc* d) const { d->AddSP("file"); @@ -444,9 +276,6 @@ RecordVal* BroFile::Rotate() if ( f == stdin || f == stdout || f == stderr ) return 0; - if ( okay_to_manage && ! is_in_cache ) - BringIntoCache(); - RecordVal* info = new RecordVal(rotate_info); FILE* newf = rotate_file(name, info); @@ -459,6 +288,7 @@ RecordVal* BroFile::Rotate() info->Assign(2, new Val(open_time, TYPE_TIME)); Unlink(); + fclose(f); f = 0; @@ -466,14 +296,13 @@ RecordVal* BroFile::Rotate() return info; } -void BroFile::CloseCachedFiles() +void BroFile::CloseOpenFiles() { - BroFile* next; - for ( BroFile* f = head; f; f = next ) + auto it = open_files.begin(); + while ( it != open_files.end() ) { - next = f->next; - if ( f->is_in_cache ) - f->Close(); + auto el = it++; + (*el).second->Close(); } } @@ -482,9 +311,6 @@ int BroFile::Write(const char* data, int len) if ( ! is_open ) return 0; - if ( ! is_in_cache && okay_to_manage ) - BringIntoCache(); - if ( ! len ) len = strlen(data); @@ -519,10 +345,13 @@ double BroFile::Size() BroFile* BroFile::GetFile(const char* name) { - for ( BroFile* f = head; f; f = f->next ) + for ( const auto &el : open_files ) { - if ( f->name && streq(name, f->name) ) - return f; + if ( el.first == name ) + { + Ref(el.second); + return el.second; + } } return new BroFile(name, "w", 0); diff --git a/src/File.h b/src/File.h index 48689b4617..07512d4465 100644 --- a/src/File.h +++ b/src/File.h @@ -8,6 +8,9 @@ #include "Obj.h" #include "Attr.h" +#include +#include + # ifdef NEED_KRB5_H # include # endif // NEED_KRB5_H @@ -54,8 +57,8 @@ public: // Returns the current size of the file, after fresh stat'ing. double Size(); - // Close all files which are managed by us. - static void CloseCachedFiles(); + // Close all files which are currently open. + static void CloseOpenFiles(); // Get the file with the given name, opening it if it doesn't yet exist. static BroFile* GetFile(const char* name); @@ -75,24 +78,14 @@ protected: * If file is not given and mode is, the filename will be opened with that * access mode. */ - bool Open(FILE* f = 0, const char* mode = 0); + bool Open(FILE* f = nullptr, const char* mode = 0); - BroFile* Prev() { return prev; } - BroFile* Next() { return next; } - void SetPrev(BroFile* f) { prev = f; } - void SetNext(BroFile* f) { next = f; } - - void Suspend(); - void PurgeCache(); void Unlink(); - void InsertAtBeginning(); - void MoveToBeginning(); // Returns nil if the file is not active, was in error, etc. // (Protected because we do not want anyone to write directly // to the file.) FILE* File(); - FILE* BringIntoCache(); // Raises a file_opened event. void RaiseOpenEvent(); @@ -101,12 +94,7 @@ protected: BroType* t; char* name; char* access; - int is_in_cache; // whether it's currently in the open-file cache int is_open; // whether the file is open in a general sense - int okay_to_manage; // we're allowed to cache/uncache - long position; // only valid if ! is_in_cache - BroFile* next; // doubly-linked list of cached files - BroFile* prev; Attributes* attrs; bool buffered; double open_time; @@ -114,6 +102,9 @@ protected: bool raw_output; static const int MIN_BUFFER_SIZE = 1024; + +private: + static std::list> open_files; }; #endif diff --git a/src/NetVar.cc b/src/NetVar.cc index f7288847e7..6c59f2a5fa 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -155,7 +155,6 @@ TableVal* preserve_orig_addr; TableVal* preserve_resp_addr; TableVal* preserve_other_addr; -int max_files_in_cache; RecordType* rotate_info; StringVal* log_rotate_base_time; @@ -229,7 +228,6 @@ void init_general_global_var() table_expire_delay = opt_internal_double("table_expire_delay"); table_incremental_step = opt_internal_int("table_incremental_step"); - max_files_in_cache = opt_internal_int("max_files_in_cache"); rotate_info = internal_type("rotate_info")->AsRecordType(); log_rotate_base_time = opt_internal_string("log_rotate_base_time"); diff --git a/src/NetVar.h b/src/NetVar.h index 583589e5ff..3c9d92eb4d 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -158,7 +158,6 @@ extern TableVal* preserve_other_addr; extern double connection_status_update_interval; -extern int max_files_in_cache; extern RecordType* rotate_info; extern StringVal* log_rotate_base_time; diff --git a/src/broker/Data.cc b/src/broker/Data.cc index d2e53fe45b..657ddd3551 100644 --- a/src/broker/Data.cc +++ b/src/broker/Data.cc @@ -100,10 +100,7 @@ struct val_converter { auto file = BroFile::GetFile(a.data()); if ( file ) - { - Ref(file); return new Val(file); - } return nullptr; } diff --git a/src/main.cc b/src/main.cc index 3556a0e99d..456920fb1b 100644 --- a/src/main.cc +++ b/src/main.cc @@ -372,7 +372,7 @@ void termination_signal() // Close files after net_delete(), because net_delete() // might write to connection content files. - BroFile::CloseCachedFiles(); + BroFile::CloseOpenFiles(); delete rule_matcher; @@ -1126,7 +1126,7 @@ int main(int argc, char** argv) // Close files after net_delete(), because net_delete() // might write to connection content files. - BroFile::CloseCachedFiles(); + BroFile::CloseOpenFiles(); } else { diff --git a/testing/btest/Baseline/core.file-caching-cloning/one0 b/testing/btest/Baseline/core.file-caching-cloning/one0 deleted file mode 100644 index abfe9a2af6..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/one0 +++ /dev/null @@ -1,4 +0,0 @@ -opened -write 0 -write 3 -write 6 diff --git a/testing/btest/Baseline/core.file-caching-cloning/one1 b/testing/btest/Baseline/core.file-caching-cloning/one1 deleted file mode 100644 index d53edaed28..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/one1 +++ /dev/null @@ -1,4 +0,0 @@ -opened -write 1 -write 4 -write 7 diff --git a/testing/btest/Baseline/core.file-caching-cloning/one2 b/testing/btest/Baseline/core.file-caching-cloning/one2 deleted file mode 100644 index 5b5c9bc130..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/one2 +++ /dev/null @@ -1,4 +0,0 @@ -opened -write 2 -write 5 -write 8 diff --git a/testing/btest/Baseline/core.file-caching-cloning/two0 b/testing/btest/Baseline/core.file-caching-cloning/two0 deleted file mode 100644 index 88e273032e..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/two0 +++ /dev/null @@ -1,6 +0,0 @@ -opened -write 0 -opened -write 3 -opened -write 6 diff --git a/testing/btest/Baseline/core.file-caching-cloning/two1 b/testing/btest/Baseline/core.file-caching-cloning/two1 deleted file mode 100644 index b2f9350bc4..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/two1 +++ /dev/null @@ -1,6 +0,0 @@ -opened -write 1 -opened -write 4 -opened -write 7 diff --git a/testing/btest/Baseline/core.file-caching-cloning/two2 b/testing/btest/Baseline/core.file-caching-cloning/two2 deleted file mode 100644 index 94a971c7db..0000000000 --- a/testing/btest/Baseline/core.file-caching-cloning/two2 +++ /dev/null @@ -1,6 +0,0 @@ -opened -write 2 -opened -write 5 -opened -write 8 diff --git a/testing/btest/core/file-caching-cloning.test b/testing/btest/core/file-caching-cloning.test deleted file mode 100644 index 03d0f5021e..0000000000 --- a/testing/btest/core/file-caching-cloning.test +++ /dev/null @@ -1,49 +0,0 @@ -# This checks that the interactions between open-file caching and -# cloning works ok. In the first case, all files can fit -# in the cache, but get cloned before every write. In the -# second case, files are eventually forced out of the cache; later writing -# requires re-opening. - -# @TEST-EXEC: zeek -b %INPUT "test_file_prefix=one" -# @TEST-EXEC: btest-diff one0 -# @TEST-EXEC: btest-diff one1 -# @TEST-EXEC: btest-diff one2 -# @TEST-EXEC: zeek -b %INPUT "test_file_prefix=two" "max_files_in_cache=2" -# @TEST-EXEC: btest-diff two0 -# @TEST-EXEC: btest-diff two1 -# @TEST-EXEC: btest-diff two2 - -const test_file_prefix = "" &redef; -global file_table: table[string] of file; -global iterations: vector of count = vector(0,1,2,3,4,5,6,7,8); - -function write_to_file(c: count) - { - local f: file; - # Take turns writing across three output files. - local filename = fmt("%s%s", test_file_prefix, c % 3 ); - - if ( filename in file_table ) - f = file_table[filename]; - else - { - f = open(filename); - file_table[filename] = f; - } - - # This when block is a trick to get the frame cloned - # and thus serialize the local file value - when ( local s = fmt("write %d", c) ) - print f, s; - } - -event file_opened(f: file) - { - print f, "opened"; - } - -event zeek_init() - { - for ( i in iterations ) - write_to_file(iterations[i]); - }