From 4959d438fa0ba55338c871ab468f6181988dcfd5 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 27 Sep 2019 18:53:07 -0700 Subject: [PATCH 01/76] Initial structure for supervisor-mode The full process hierarchy isn't set up yet, but these changes help prepare by doing two things: - Add a -j option to enable supervisor-mode. Currently, just a single "stem" process gets forked early on to be used as the basis for further forking into real cluster nodes. - Separates the parsing of command-line options from their consumption. i.e. need to parse whether we're in -j supervisor-mode before modifying any global state since that would taint the "stem" process. The new intermediate structure containing the parsed options may also serve as a way to pass configuration info from "stem" to its descendent cluster node processes. --- src/CMakeLists.txt | 1 + src/DebugLogger.cc | 3 +- src/DebugLogger.h | 1 + src/Flare.cc | 17 +- src/Flare.h | 4 +- src/Net.cc | 26 +- src/Net.h | 10 +- src/Pipe.cc | 6 +- src/RuleMatcher.cc | 10 +- src/RuleMatcher.h | 2 +- src/Supervisor.cc | 135 ++++++ src/Supervisor.h | 54 +++ src/input.h | 4 +- src/main.cc | 772 +++++++++++++++++++++-------------- src/scan.l | 11 +- src/threading/BasicThread.cc | 24 +- src/util.cc | 17 +- src/util.h | 20 + 18 files changed, 751 insertions(+), 366 deletions(-) create mode 100644 src/Supervisor.cc create mode 100644 src/Supervisor.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f2009c536f..2655b7f531 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -286,6 +286,7 @@ set(MAIN_SRCS Notifier.cc Stats.cc Stmt.cc + Supervisor.cc Tag.cc Timer.cc Traverse.cc diff --git a/src/DebugLogger.cc b/src/DebugLogger.cc index 6af7e26e38..fdb57efc61 100644 --- a/src/DebugLogger.cc +++ b/src/DebugLogger.cc @@ -19,7 +19,8 @@ DebugLogger::Stream DebugLogger::streams[NUM_DBGS] = { { "threading", 0, false }, { "file_analysis", 0, false }, { "plugins", 0, false }, { "zeekygen", 0, false }, { "pktio", 0, false }, { "broker", 0, false }, - { "scripts", 0, false} + { "scripts", 0, false}, + { "supervisor", 0, false} }; DebugLogger::DebugLogger() diff --git a/src/DebugLogger.h b/src/DebugLogger.h index 4efb6a0421..e0eb57f47b 100644 --- a/src/DebugLogger.h +++ b/src/DebugLogger.h @@ -29,6 +29,7 @@ enum DebugStream { DBG_PKTIO, // Packet sources and dumpers. DBG_BROKER, // Broker communication DBG_SCRIPTS, // Script initialization + DBG_SUPERVISOR, // Process supervisor NUM_DBGS // Has to be last }; diff --git a/src/Flare.cc b/src/Flare.cc index 87dc946955..4229ffb63c 100644 --- a/src/Flare.cc +++ b/src/Flare.cc @@ -17,7 +17,14 @@ static void bad_pipe_op(const char* which) { char buf[256]; bro_strerror_r(errno, buf, sizeof(buf)); - reporter->FatalErrorWithCore("unexpected pipe %s failure: %s", which, buf); + + if ( reporter ) + reporter->FatalErrorWithCore("unexpected pipe %s failure: %s", which, buf); + else + { + fprintf(stderr, "unexpected pipe %s failure: %s", which, buf); + abort(); + } } void Flare::Fire() @@ -49,8 +56,9 @@ void Flare::Fire() } } -void Flare::Extinguish() +int Flare::Extinguish() { + int rval = 0; char tmp[256]; for ( ; ; ) @@ -58,8 +66,11 @@ void Flare::Extinguish() int n = read(pipe.ReadFD(), &tmp, sizeof(tmp)); if ( n >= 0 ) + { + rval += n; // Pipe may not be empty yet: try again. continue; + } if ( errno == EAGAIN ) // Success: pipe is now empty. @@ -71,4 +82,6 @@ void Flare::Extinguish() bad_pipe_op("read"); } + + return rval; } diff --git a/src/Flare.h b/src/Flare.h index cc340494aa..ebe902c172 100644 --- a/src/Flare.h +++ b/src/Flare.h @@ -31,8 +31,10 @@ public: /** * Take the object out of the "ready" state. + * @return number of bytes read from the pipe, corresponds to the number + * of times Fire() was called. */ - void Extinguish(); + int Extinguish(); private: Pipe pipe; diff --git a/src/Net.cc b/src/Net.cc index af9806412f..f5b15be9ba 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -145,40 +145,39 @@ void net_update_time(double new_network_time) PLUGIN_HOOK_VOID(HOOK_UPDATE_NETWORK_TIME, HookUpdateNetworkTime(new_network_time)); } -void net_init(name_list& interfaces, name_list& readfiles, - const char* writefile, int do_watchdog) +void net_init(const std::vector& interfaces, + const std::vector& pcap_input_files, + const std::string& pcap_output_file, bool do_watchdog) { - if ( readfiles.length() > 0 ) + if ( ! pcap_input_files.empty() ) { reading_live = pseudo_realtime > 0.0; reading_traces = 1; - for ( int i = 0; i < readfiles.length(); ++i ) + for ( const auto& pif : pcap_input_files ) { - iosource::PktSrc* ps = iosource_mgr->OpenPktSrc(readfiles[i], false); + iosource::PktSrc* ps = iosource_mgr->OpenPktSrc(pif, false); assert(ps); if ( ! ps->IsOpen() ) reporter->FatalError("problem with trace file %s (%s)", - readfiles[i], - ps->ErrorMsg()); + pif.data(), ps->ErrorMsg()); } } - else if ( interfaces.length() > 0 ) + else if ( ! interfaces.empty() ) { reading_live = 1; reading_traces = 0; - for ( int i = 0; i < interfaces.length(); ++i ) + for ( const auto& iface : interfaces ) { - iosource::PktSrc* ps = iosource_mgr->OpenPktSrc(interfaces[i], true); + iosource::PktSrc* ps = iosource_mgr->OpenPktSrc(iface, true); assert(ps); if ( ! ps->IsOpen() ) reporter->FatalError("problem with interface %s (%s)", - interfaces[i], - ps->ErrorMsg()); + iface.data(), ps->ErrorMsg()); } } @@ -189,8 +188,9 @@ void net_init(name_list& interfaces, name_list& readfiles, // a timer. reading_traces = reading_live = 0; - if ( writefile ) + if ( ! pcap_output_file.empty() ) { + const char* writefile = pcap_output_file.data(); pkt_dumper = iosource_mgr->OpenPktDumper(writefile, false); assert(pkt_dumper); diff --git a/src/Net.h b/src/Net.h index d14f1346ef..c841a1fde4 100644 --- a/src/Net.h +++ b/src/Net.h @@ -2,6 +2,9 @@ #pragma once +#include +#include + #include "net_util.h" #include "util.h" #include "List.h" @@ -10,8 +13,9 @@ #include "iosource/PktSrc.h" #include "iosource/PktDumper.h" -extern void net_init(name_list& interfaces, name_list& readfiles, - const char* writefile, int do_watchdog); +extern void net_init(const std::vector& interfaces, + const std::vector& pcap_input_files, + const std::string& pcap_output_file, bool do_watchdog); extern void net_run(); extern void net_get_final_stats(); extern void net_finish(int drain_events); @@ -76,8 +80,6 @@ extern iosource::IOSource* current_iosrc; extern iosource::PktDumper* pkt_dumper; // where to save packets -extern char* writefile; - // Script file we have already scanned (or are in the process of scanning). // They are identified by inode number. struct ScannedFile { diff --git a/src/Pipe.cc b/src/Pipe.cc index 0fa0eefdd7..7276571402 100644 --- a/src/Pipe.cc +++ b/src/Pipe.cc @@ -13,7 +13,11 @@ static void pipe_fail(int eno) { char tmp[256]; bro_strerror_r(eno, tmp, sizeof(tmp)); - reporter->FatalError("Pipe failure: %s", tmp); + + if ( reporter ) + reporter->FatalError("Pipe failure: %s", tmp); + else + fprintf(stderr, "Pipe failure: %s", tmp); } static void set_flags(int fd, int flags) diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 7200ae2ad7..747b4ce16e 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -231,7 +231,7 @@ void RuleMatcher::Delete(RuleHdrTest* node) delete node; } -bool RuleMatcher::ReadFiles(const name_list& files) +bool RuleMatcher::ReadFiles(const std::vector& files) { #ifdef USE_PERFTOOLS_DEBUG HeapLeakChecker::Disabler disabler; @@ -239,18 +239,18 @@ bool RuleMatcher::ReadFiles(const name_list& files) parse_error = false; - for ( int i = 0; i < files.length(); ++i ) + for ( const auto& f : files ) { - rules_in = open_file(find_file(files[i], bro_path(), ".sig")); + rules_in = open_file(find_file(f, bro_path(), ".sig")); if ( ! rules_in ) { - reporter->Error("Can't open signature file %s", files[i]); + reporter->Error("Can't open signature file %s", f.data()); return false; } rules_line_number = 0; - current_rule_file = files[i]; + current_rule_file = f.data(); rules_parse(); fclose(rules_in); } diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 21c65d2700..5804fe63d8 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -221,7 +221,7 @@ public: ~RuleMatcher(); // Parse the given files and built up data structures. - bool ReadFiles(const name_list& files); + bool ReadFiles(const std::vector& files); /** * Inititialize a state object for matching file magic signatures. diff --git a/src/Supervisor.cc b/src/Supervisor.cc new file mode 100644 index 0000000000..2d9cc64e18 --- /dev/null +++ b/src/Supervisor.cc @@ -0,0 +1,135 @@ + +#include +#include + +#include "Supervisor.h" +#include "Reporter.h" +#include "DebugLogger.h" +#include "zeek-config.h" +#include "util.h" + +extern "C" { +#include "setsignal.h" +} + +static RETSIGTYPE supervisor_sig_handler(int signo) + { + DBG_LOG(DBG_SUPERVISOR, "received SIGCHLD signal: %d", signo); + zeek::supervisor->ObserveChildSignal(); + return RETSIGVAL; + } + +zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, + std::unique_ptr pipe, + pid_t arg_stem_pid) + : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) + { + DBG_LOG(DBG_SUPERVISOR, "forked stem process %d", stem_pid); + DBG_LOG(DBG_SUPERVISOR, "using %d workers", config.num_workers); + setsignal(SIGCHLD, supervisor_sig_handler); + SetIdle(true); + } + +void zeek::Supervisor::ObserveChildSignal() + { + signal_flare.Fire(); + } + +zeek::Supervisor::~Supervisor() + { + if ( ! stem_pid ) + { + DBG_LOG(DBG_SUPERVISOR, "shutdown, stem process already exited"); + return; + } + + DBG_LOG(DBG_SUPERVISOR, "shutdown, killing stem process %d", stem_pid); + + // TODO: is signal the best way to trigger shutdown of decendent processes? + auto kill_res = kill(stem_pid, SIGTERM); + + if ( kill_res == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("Failed to send SIGTERM to stem process: %s", tmp); + } + else + { + int status; + auto wait_res = waitpid(stem_pid, &status, 0); + + if ( wait_res == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("Failed to wait for stem process to exit: %s", tmp); + } + } + } + +void zeek::Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, + iosource::FD_Set* except) + { + read->Insert(signal_flare.FD()); + read->Insert(stem_pipe->ReadFD()); + } + +double zeek::Supervisor::NextTimestamp(double* local_network_time) + { + // We're only asked for a timestamp if either (1) a FD was ready + // or (2) we're not idle (and we go idle if when Process is no-op), + // so there's no case where returning -1 to signify a skip will help. + return timer_mgr->Time(); + } + +void zeek::Supervisor::Process() + { + auto child_signals = signal_flare.Extinguish(); + + DBG_LOG(DBG_SUPERVISOR, "process: child_signals %d, stem_pid %d", + child_signals, stem_pid); + + if ( child_signals && stem_pid ) + { + DBG_LOG(DBG_SUPERVISOR, "handle child signal, wait for %d", stem_pid); + int status; + auto res = waitpid(stem_pid, &status, WNOHANG); + + if ( res == 0 ) + { + DBG_LOG(DBG_SUPERVISOR, "false alarm, stem process still lives"); + } + else if ( res == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("Supervisor failed to get exit status" + " of stem process: %s", tmp); + } + else if ( WIFEXITED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", + WEXITSTATUS(status)); + stem_pid = 0; + } + else if ( WIFSIGNALED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", + WTERMSIG(status)); + stem_pid = 0; + } + else + { + reporter->Error("Supervisor failed to get exit status" + " of stem process for unknown reason"); + } + + // TODO: add proper handling of stem process exiting + // In wait cases is it ok for the stem process to terminate and + // in what cases do we need to automatically re-recreate it ? + // And how do we re-create it? It would be too late to fork() again + // because we've potentially already changed so much global state by the + // time we get there, so guess we exec() and start over completely ?. + } + } diff --git a/src/Supervisor.h b/src/Supervisor.h new file mode 100644 index 0000000000..aa647c5aed --- /dev/null +++ b/src/Supervisor.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "iosource/IOSource.h" +#include "Pipe.h" +#include "Flare.h" + +namespace zeek { + +class Supervisor : public iosource::IOSource { +public: + + struct Config { + int num_workers = 1; + std::vector pcaps; + }; + + Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); + + ~Supervisor(); + + pid_t StemPID() const + { return stem_pid; } + + void ObserveChildSignal(); + +private: + + // IOSource interface overrides: + void GetFds(iosource::FD_Set* read, iosource::FD_Set* write, + iosource::FD_Set* except) override; + + double NextTimestamp(double* local_network_time) override; + + void Process() override; + + const char* Tag() override + { return "zeek::Supervisor"; } + + Config config; + pid_t stem_pid; + std::unique_ptr stem_pipe; + bro::Flare signal_flare; +}; + +extern Supervisor* supervisor; + +} // namespace zeek diff --git a/src/input.h b/src/input.h index be898d154b..2fa815ccb0 100644 --- a/src/input.h +++ b/src/input.h @@ -38,8 +38,8 @@ extern int bro_argc; extern char** bro_argv; extern const char* prog; -extern name_list prefixes; // -p flag -extern char* command_line_policy; // -e flag +extern std::vector zeek_script_prefixes; // -p flag +extern const char* command_line_policy; // -e flag extern vector params; class Stmt; diff --git a/src/main.cc b/src/main.cc index 1bfaa5882d..a76355f3a3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifdef HAVE_GETOPT_H #include @@ -43,6 +44,7 @@ extern "C" { #include "Brofiler.h" #include "Traverse.h" +#include "Supervisor.h" #include "threading/Manager.h" #include "input/Manager.h" #include "logging/Manager.h" @@ -91,10 +93,9 @@ file_analysis::Manager* file_mgr = 0; zeekygen::Manager* zeekygen_mgr = 0; iosource::Manager* iosource_mgr = 0; bro_broker::Manager* broker_mgr = 0; +zeek::Supervisor* zeek::supervisor = 0; -const char* prog; -char* writefile = 0; -name_list prefixes; +std::vector zeek_script_prefixes; Stmt* stmts; EventHandlerPtr net_done = 0; RuleMatcher* rule_matcher = 0; @@ -104,10 +105,10 @@ ProfileLogger* segment_logger = 0; SampleLogger* sample_logger = 0; int signal_val = 0; extern char version[]; -char* command_line_policy = 0; +const char* command_line_policy = 0; vector params; set requested_plugins; -char* proc_status_file = 0; +const char* proc_status_file = 0; OpaqueType* md5_type = 0; OpaqueType* sha1_type = 0; @@ -142,24 +143,24 @@ const char* zeek_version() #endif } -bool bro_dns_fake() +static bool zeek_dns_fake() { return zeekenv("ZEEK_DNS_FAKE"); } -void usage(int code = 1) +static void usage(const char* prog, int code = 1) { fprintf(stderr, "zeek version %s\n", zeek_version()); fprintf(stderr, "usage: %s [options] [file ...]\n", prog); - fprintf(stderr, " | policy file, or read stdin\n"); + fprintf(stderr, " | Zeek script file, or read stdin\n"); fprintf(stderr, " -a|--parse-only | exit immediately after parsing scripts\n"); fprintf(stderr, " -b|--bare-mode | don't load scripts from the base/ directory\n"); - fprintf(stderr, " -d|--debug-policy | activate policy file debugging\n"); - fprintf(stderr, " -e|--exec | augment loaded policies by given code\n"); + fprintf(stderr, " -d|--debug-script | activate Zeek script debugging\n"); + fprintf(stderr, " -e|--exec | augment loaded scripts by given code\n"); fprintf(stderr, " -f|--filter | tcpdump filter\n"); fprintf(stderr, " -h|--help | command line help\n"); fprintf(stderr, " -i|--iface | read from given interface\n"); - fprintf(stderr, " -p|--prefix | add given prefix to policy file resolution\n"); + fprintf(stderr, " -p|--prefix | add given prefix to Zeek script file resolution\n"); fprintf(stderr, " -r|--readfile | read from given tcpdump file\n"); fprintf(stderr, " -s|--rulefile | read rules from given file\n"); fprintf(stderr, " -t|--tracefile | activate execution tracing\n"); @@ -187,6 +188,7 @@ void usage(int code = 1) fprintf(stderr, " -M|--mem-profile | record heap [perftools]\n"); #endif fprintf(stderr, " --pseudo-realtime[=] | enable pseudo-realtime for performance evaluation (default 1)\n"); + fprintf(stderr, " -j|--jobs[=] | enable supervisor mode with N workers (default 1)\n"); #ifdef USE_IDMEF fprintf(stderr, " -n|--idmef-dtd | specify path to IDMEF DTD file\n"); @@ -196,18 +198,279 @@ void usage(int code = 1) fprintf(stderr, " $ZEEK_PLUGIN_PATH | plugin search path (%s)\n", bro_plugin_path()); fprintf(stderr, " $ZEEK_PLUGIN_ACTIVATE | plugins to always activate (%s)\n", bro_plugin_activate()); fprintf(stderr, " $ZEEK_PREFIXES | prefix list (%s)\n", bro_prefixes().c_str()); - fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n", bro_dns_fake() ? "on" : "off"); + fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n", zeek_dns_fake() ? "on" : "off"); fprintf(stderr, " $ZEEK_SEED_FILE | file to load seeds from (not set)\n"); fprintf(stderr, " $ZEEK_LOG_SUFFIX | ASCII log file extension (.%s)\n", logging::writer::Ascii::LogExt().c_str()); fprintf(stderr, " $ZEEK_PROFILER_FILE | Output file for script execution statistics (not set)\n"); fprintf(stderr, " $ZEEK_DISABLE_ZEEKYGEN | Disable Zeekygen documentation support (%s)\n", zeekenv("ZEEK_DISABLE_ZEEKYGEN") ? "set" : "not set"); fprintf(stderr, " $ZEEK_DNS_RESOLVER | IPv4/IPv6 address of DNS resolver to use (%s)\n", zeekenv("ZEEK_DNS_RESOLVER") ? zeekenv("ZEEK_DNS_RESOLVER") : "not set, will use first IPv4 address from /etc/resolv.conf"); + fprintf(stderr, " $ZEEK_DEBUG_LOG_STDERR | Use stderr for debug logs generated via the -B flag"); fprintf(stderr, "\n"); exit(code); } +struct zeek_options { + bool print_version = false; + bool print_usage = false; + bool print_execution_time = false; + bool print_signature_debug_info = false; + int print_plugins = 0; + + std::string debug_log_streams; + std::string debug_script_tracing_file; + + std::string identifier_to_print; + std::string script_code_to_exec; + std::vector script_prefixes = { "" }; // "" = "no prefix" + + int supervised_workers = 0; + int signature_re_level = 4; + bool ignore_checksums = false; + bool use_watchdog = false; + double pseudo_realtime = 0; + DNS_MgrMode dns_mode = DNS_DEFAULT; + + bool parse_only = false; + bool bare_mode = false; + bool debug_scripts = false; + bool perftools_check_leaks = false; + bool perftools_profile = false; + + std::string pcap_filter; + std::vector interfaces; + std::vector pcap_files; + std::vector signature_files; + + std::string pcap_output_file; + std::string random_seed_input_file; + std::string random_seed_output_file; + std::string process_status_file; + std::string zeekygen_config_file; + std::string libidmef_dtd_file = "idmef-message.dtd"; + + std::set plugins_to_load; + std::vector scripts_to_load; + std::vector script_options_to_set; +}; + +static zeek_options parse_cmdline(int argc, char** argv) + { + zeek_options rval = {}; + + constexpr struct option long_opts[] = { + {"parse-only", no_argument, 0, 'a'}, + {"bare-mode", no_argument, 0, 'b'}, + {"debug-script", no_argument, 0, 'd'}, + {"exec", required_argument, 0, 'e'}, + {"filter", required_argument, 0, 'f'}, + {"help", no_argument, 0, 'h'}, + {"iface", required_argument, 0, 'i'}, + {"zeekygen", required_argument, 0, 'X'}, + {"prefix", required_argument, 0, 'p'}, + {"readfile", required_argument, 0, 'r'}, + {"rulefile", required_argument, 0, 's'}, + {"tracefile", required_argument, 0, 't'}, + {"writefile", required_argument, 0, 'w'}, + {"version", no_argument, 0, 'v'}, + {"no-checksums", no_argument, 0, 'C'}, + {"force-dns", no_argument, 0, 'F'}, + {"load-seeds", required_argument, 0, 'G'}, + {"save-seeds", required_argument, 0, 'H'}, + {"print-plugins", no_argument, 0, 'N'}, + {"prime-dns", no_argument, 0, 'P'}, + {"time", no_argument, 0, 'Q'}, + {"debug-rules", no_argument, 0, 'S'}, + {"re-level", required_argument, 0, 'T'}, + {"watchdog", no_argument, 0, 'W'}, + {"print-id", required_argument, 0, 'I'}, + {"status-file", required_argument, 0, 'U'}, + +#ifdef DEBUG + {"debug", required_argument, 0, 'B'}, +#endif +#ifdef USE_IDMEF + {"idmef-dtd", required_argument, 0, 'n'}, +#endif +#ifdef USE_PERFTOOLS_DEBUG + {"mem-leaks", no_argument, 0, 'm'}, + {"mem-profile", no_argument, 0, 'M'}, +#endif + + {"pseudo-realtime", optional_argument, 0, 'E'}, + {"jobs", optional_argument, 0, 'j'}, + + {0, 0, 0, 0}, + }; + + char opts[256]; + safe_strncpy(opts, "B:e:f:G:H:I:i:j::n:p:r:s:T:t:U:w:X:CFNPQSWabdhv", + sizeof(opts)); + +#ifdef USE_PERFTOOLS_DEBUG + strncat(opts, "mM", 2); +#endif + + int op; + int long_optsind; + opterr = 0; + + while ( (op = getopt_long(argc, argv, opts, long_opts, &long_optsind)) != EOF ) + switch ( op ) { + case 'a': + rval.parse_only = true; + break; + case 'b': + rval.bare_mode = true; + break; + case 'd': + rval.debug_scripts = true; + break; + case 'e': + if ( optarg[0] == 0 ) + // Cheating a bit, but allows checking for an empty string + // to determine whether -e was used or not. + rval.script_code_to_exec = " "; + else + rval.script_code_to_exec = optarg; + break; + case 'f': + rval.pcap_filter = optarg; + break; + case 'h': + rval.print_usage = true; + break; + case 'i': + if ( ! rval.pcap_files.empty() ) + { + fprintf(stderr, "Using -i is not allowed when reading pcap files"); + exit(1); + } + rval.interfaces.emplace_back(optarg); + break; + case 'j': + rval.supervised_workers = 1; + if ( optarg ) + rval.supervised_workers = atoi(optarg); + break; + case 'p': + rval.script_prefixes.emplace_back(optarg); + break; + case 'r': + if ( ! rval.interfaces.empty() ) + { + fprintf(stderr, "Using -r is not allowed when reading a live interface"); + exit(1); + } + rval.pcap_files.emplace_back(optarg); + break; + case 's': + rval.signature_files.emplace_back(optarg); + break; + case 't': + rval.debug_script_tracing_file = optarg; + break; + case 'v': + rval.print_version = true; + break; + case 'w': + rval.pcap_output_file = optarg; + break; + case 'B': + rval.debug_log_streams = optarg; + break; + case 'C': + rval.ignore_checksums = true; + break; + case 'E': + rval.pseudo_realtime = 1.0; + if ( optarg ) + rval.pseudo_realtime = atof(optarg); + break; + case 'F': + if ( rval.dns_mode != DNS_DEFAULT ) + usage(argv[0], 1); + rval.dns_mode = DNS_FORCE; + break; + case 'G': + rval.random_seed_input_file = optarg; + break; + case 'H': + rval.random_seed_output_file = optarg; + break; + case 'I': + rval.identifier_to_print = optarg; + break; + case 'N': + ++rval.print_plugins; + break; + case 'P': + if ( rval.dns_mode != DNS_DEFAULT ) + usage(argv[0], 1); + rval.dns_mode = DNS_PRIME; + break; + case 'Q': + rval.print_execution_time = true; + break; + case 'S': + rval.print_signature_debug_info = true; + break; + case 'T': + rval.signature_re_level = atoi(optarg); + break; + case 'U': + rval.process_status_file = optarg; + break; + case 'W': + rval.use_watchdog = true; + break; + case 'X': + rval.zeekygen_config_file = optarg; + break; + +#ifdef USE_PERFTOOLS_DEBUG + case 'm': + rval.perftools_check_leaks = 1; + break; + case 'M': + rval.perftools_profile = 1; + break; +#endif + +#ifdef USE_IDMEF + case 'n': + rval.libidmef_dtd_path = optarg; + break; +#endif + + case 0: + // This happens for long options that don't have + // a short-option equivalent. + break; + + case '?': + default: + usage(argv[0], 1); + break; + } + + // Process remaining arguments. X=Y arguments indicate script + // variable/parameter assignments. X::Y arguments indicate plugins to + // activate/query. The remainder are treated as scripts to load. + while ( optind < argc ) + { + if ( strchr(argv[optind], '=') ) + rval.script_options_to_set.emplace_back(argv[optind++]); + else if ( strstr(argv[optind], "::") ) + rval.plugins_to_load.emplace(argv[optind++]); + else + rval.scripts_to_load.emplace_back(argv[optind++]); + } + + return rval; + } + bool show_plugins(int level) { plugin::Manager::plugin_list plugins = plugin_mgr->ActivePlugins(); @@ -349,6 +612,7 @@ void terminate_bro() delete analyzer_mgr; delete file_mgr; // broker_mgr is deleted via iosource_mgr + // supervisor is deleted via iosource_mgr delete iosource_mgr; delete log_mgr; delete reporter; @@ -397,271 +661,133 @@ static void bro_new_handler() out_of_memory("new"); } +static std::vector get_script_signature_files() + { + std::vector rval; + + // Parse rule files defined on the script level. + char* script_signature_files = + copy_string(internal_val("signature_files")->AsString()->CheckString()); + + char* tmp = script_signature_files; + char* s; + while ( (s = strsep(&tmp, " \t")) ) + if ( *s ) + rval.emplace_back(s); + + delete [] script_signature_files; + return rval; + } + int main(int argc, char** argv) { - std::set_new_handler(bro_new_handler); - - double time_start = current_time(true); - - brofiler.ReadStats(); - bro_argc = argc; bro_argv = new char* [argc]; for ( int i = 0; i < argc; i++ ) bro_argv[i] = copy_string(argv[i]); - name_list interfaces; - name_list read_files; - name_list rule_files; - char* id_name = 0; + auto options = parse_cmdline(argc, argv); - char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); - char* seed_save_file = 0; - char* user_pcap_filter = 0; - char* debug_streams = 0; - int parse_only = false; - int bare_mode = false; - int do_watchdog = 0; - int override_ignore_checksums = 0; - int rule_debug = 0; - int RE_level = 4; - int print_plugins = 0; - int time_bro = 0; + if ( options.print_usage ) + usage(argv[0], 0); - static struct option long_opts[] = { - {"parse-only", no_argument, 0, 'a'}, - {"bare-mode", no_argument, 0, 'b'}, - {"debug-policy", no_argument, 0, 'd'}, - {"exec", required_argument, 0, 'e'}, - {"filter", required_argument, 0, 'f'}, - {"help", no_argument, 0, 'h'}, - {"iface", required_argument, 0, 'i'}, - {"zeekygen", required_argument, 0, 'X'}, - {"prefix", required_argument, 0, 'p'}, - {"readfile", required_argument, 0, 'r'}, - {"rulefile", required_argument, 0, 's'}, - {"tracefile", required_argument, 0, 't'}, - {"writefile", required_argument, 0, 'w'}, - {"version", no_argument, 0, 'v'}, - {"no-checksums", no_argument, 0, 'C'}, - {"force-dns", no_argument, 0, 'F'}, - {"load-seeds", required_argument, 0, 'G'}, - {"save-seeds", required_argument, 0, 'H'}, - {"print-plugins", no_argument, 0, 'N'}, - {"prime-dns", no_argument, 0, 'P'}, - {"time", no_argument, 0, 'Q'}, - {"debug-rules", no_argument, 0, 'S'}, - {"re-level", required_argument, 0, 'T'}, - {"watchdog", no_argument, 0, 'W'}, - {"print-id", required_argument, 0, 'I'}, - {"status-file", required_argument, 0, 'U'}, + if ( options.print_version ) + { + fprintf(stdout, "%s version %s\n", argv[0], zeek_version()); + exit(0); + } -#ifdef DEBUG - {"debug", required_argument, 0, 'B'}, -#endif -#ifdef USE_IDMEF - {"idmef-dtd", required_argument, 0, 'n'}, -#endif -#ifdef USE_PERFTOOLS_DEBUG - {"mem-leaks", no_argument, 0, 'm'}, - {"mem-profile", no_argument, 0, 'M'}, -#endif + bool use_supervisor = options.supervised_workers > 0; + pid_t stem_pid = 0; + std::unique_ptr supervisor_pipe; - {"pseudo-realtime", optional_argument, 0, 'E'}, + if ( use_supervisor ) + { + supervisor_pipe.reset(new bro::Pipe{FD_CLOEXEC, FD_CLOEXEC, + O_NONBLOCK, O_NONBLOCK}); + stem_pid = fork(); - {0, 0, 0, 0}, - }; + if ( stem_pid == -1 ) + { + fprintf(stderr, "failed to fork Zeek supervisor stem process: %s\n", + strerror(errno)); + exit(1); + } - enum DNS_MgrMode dns_type = DNS_DEFAULT; + if ( stem_pid == 0 ) + { + zeek::set_thread_name("zeek-stem"); + // TODO: changing the process group here so that SIGINT to the + // supervisor doesn't also get passed to the children. i.e. supervisor + // should be in charge of initiating orderly shutdown. But calling + // just setpgid() like this is technically a race-condition -- need + // to do more work of blocking SIGINT before fork(), unblocking after, + // then also calling setpgid() from parent. And just not doing that + // until more is known whether that's the right SIGINT behavior in + // the first place. + auto res = setpgid(0, 0); - dns_type = bro_dns_fake() ? DNS_FAKE : DNS_DEFAULT; + if ( res == -1 ) + fprintf(stderr, "failed to set stem process group: %s\n", + strerror(errno)); + + for ( ; ; ) + { + // TODO: make a proper I/O loop w/ message processing via pipe + // TODO: better way to detect loss of parent than polling + + if ( getppid() == 1 ) + exit(0); + + sleep(1); + } + } + } + + std::set_new_handler(bro_new_handler); + + double time_start = current_time(true); + + brofiler.ReadStats(); + + auto dns_type = options.dns_mode; + + if ( dns_type == DNS_DEFAULT && zeek_dns_fake() ) + dns_type = DNS_FAKE; RETSIGTYPE (*oldhandler)(int); - prog = argv[0]; + zeek_script_prefixes = options.script_prefixes; + auto zeek_prefixes = zeekenv("ZEEK_PREFIXES"); - prefixes.push_back(strdup("")); // "" = "no prefix" + if ( zeek_prefixes ) + tokenize_string(zeek_prefixes, ":", &zeek_script_prefixes); - char* p = zeekenv("ZEEK_PREFIXES"); - - if ( p ) - add_to_name_list(p, ':', prefixes); - - string zeekygen_config; - -#ifdef USE_IDMEF - string libidmef_dtd_path = "idmef-message.dtd"; -#endif - - extern char* optarg; - extern int optind, opterr; - - int long_optsind; - opterr = 0; - - char opts[256]; - safe_strncpy(opts, "B:e:f:G:H:I:i:n:p:r:s:T:t:U:w:X:CFNPQSWabdhv", - sizeof(opts)); + pseudo_realtime = options.pseudo_realtime; #ifdef USE_PERFTOOLS_DEBUG - strncat(opts, "mM", 2); + perftools_leaks = options.perftools_check_leaks; + perftools_profile = options.perftools_profile; #endif - int op; - while ( (op = getopt_long(argc, argv, opts, long_opts, &long_optsind)) != EOF ) - switch ( op ) { - case 'a': - parse_only = true; - break; - - case 'b': - bare_mode = true; - break; - - case 'd': - fprintf(stderr, "Policy file debugging ON.\n"); - g_policy_debug = true; - break; - - case 'e': - command_line_policy = optarg; - break; - - case 'f': - user_pcap_filter = optarg; - break; - - case 'h': - usage(0); - break; - - case 'i': - interfaces.push_back(optarg); - break; - - case 'p': - prefixes.push_back(optarg); - break; - - case 'r': - read_files.push_back(optarg); - break; - - case 's': - rule_files.push_back(optarg); - break; - - case 't': - g_trace_state.SetTraceFile(optarg); - g_trace_state.TraceOn(); - break; - - case 'v': - fprintf(stdout, "%s version %s\n", prog, zeek_version()); - exit(0); - break; - - case 'w': - writefile = optarg; - break; - - case 'B': - debug_streams = optarg; - break; - - case 'C': - override_ignore_checksums = 1; - break; - - case 'E': - pseudo_realtime = 1.0; - if ( optarg ) - pseudo_realtime = atof(optarg); - break; - - case 'F': - if ( dns_type != DNS_DEFAULT ) - usage(1); - dns_type = DNS_FORCE; - break; - - case 'G': - seed_load_file = optarg; - break; - - case 'H': - seed_save_file = optarg; - break; - - case 'I': - id_name = optarg; - break; - - case 'N': - ++print_plugins; - break; - - case 'P': - if ( dns_type != DNS_DEFAULT ) - usage(1); - dns_type = DNS_PRIME; - break; - - case 'Q': - time_bro = 1; - break; - - case 'S': - rule_debug = 1; - break; - - case 'T': - RE_level = atoi(optarg); - break; - - case 'U': - proc_status_file = optarg; - break; - - case 'W': - do_watchdog = 1; - break; - - case 'X': - zeekygen_config = optarg; - break; - -#ifdef USE_PERFTOOLS_DEBUG - case 'm': - perftools_leaks = 1; - break; - - case 'M': - perftools_profile = 1; - break; -#endif - -#ifdef USE_IDMEF - case 'n': - fprintf(stderr, "Using IDMEF XML DTD from %s\n", optarg); - libidmef_dtd_path = optarg; - break; -#endif - - case 0: - // This happens for long options that don't have - // a short-option equivalent. - break; - - case '?': - default: - usage(1); - break; + if ( options.debug_scripts ) + { + g_policy_debug = options.debug_scripts; + fprintf(stderr, "Zeek script debugging ON.\n"); } - if ( interfaces.length() > 0 && read_files.length() > 0 ) - usage(1); + if ( ! options.script_code_to_exec.empty() ) + command_line_policy = options.script_code_to_exec.data(); + + if ( ! options.debug_script_tracing_file.empty() ) + { + g_trace_state.SetTraceFile(options.debug_script_tracing_file.data()); + g_trace_state.TraceOn(); + } + + if ( ! options.process_status_file.empty() ) + proc_status_file = options.process_status_file.data(); atexit(atexit_handler); set_processing_status("INITIALIZING", "main"); @@ -675,14 +801,44 @@ int main(int argc, char** argv) plugin_mgr = new plugin::Manager(); #ifdef DEBUG - if ( debug_streams ) + if ( ! options.debug_log_streams.empty() ) { - debug_logger.EnableStreams(debug_streams); - debug_logger.OpenDebugLog("debug"); + debug_logger.EnableStreams(options.debug_log_streams.data()); + const char* debug_log_name = nullptr; + + if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) + { + if ( use_supervisor ) + debug_log_name = "debug-supervisor"; + else + debug_log_name = "debug"; + } + + debug_logger.OpenDebugLog(debug_log_name); } #endif - init_random_seed((seed_load_file && *seed_load_file ? seed_load_file : 0) , seed_save_file); + if ( use_supervisor ) + { + zeek::Supervisor::Config cfg = {}; + cfg.pcaps = options.pcap_files; + cfg.num_workers = options.supervised_workers; + zeek::supervisor = new zeek::Supervisor(std::move(cfg), + std::move(supervisor_pipe), + stem_pid); + + // TODO: what options actually apply to the supervisor ? + options.pcap_files = {}; + options.interfaces = {}; + } + + const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); + + if ( ! options.random_seed_input_file.empty() ) + seed_load_file = options.random_seed_input_file.data(); + + init_random_seed((seed_load_file && *seed_load_file ? seed_load_file : 0), + options.random_seed_output_file.empty() ? 0 : options.random_seed_output_file.data()); // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); init_hash_function(); @@ -701,9 +857,9 @@ int main(int argc, char** argv) reporter->Error("Failed to initialize sqlite3: %s", sqlite3_errstr(r)); #ifdef USE_IDMEF - char* libidmef_dtd_path_cstr = new char[libidmef_dtd_path.length() + 1]; - safe_strncpy(libidmef_dtd_path_cstr, libidmef_dtd_path.c_str(), - libidmef_dtd_path.length()); + char* libidmef_dtd_path_cstr = new char[options.libidmef_dtd_file.size() + 1]; + safe_strncpy(libidmef_dtd_path_cstr, options.libidmef_dtd_file.data(), + options.libidmef_dtd_file.size()); globalsInit(libidmef_dtd_path_cstr); // Init LIBIDMEF globals createCurrentDoc("1.0"); // Set a global XML document #endif @@ -711,34 +867,34 @@ int main(int argc, char** argv) timer_mgr = new PQ_TimerMgr(""); // timer_mgr = new CQ_TimerMgr(); - zeekygen_mgr = new zeekygen::Manager(zeekygen_config, bro_argv[0]); + zeekygen_mgr = new zeekygen::Manager(options.zeekygen_config_file, + bro_argv[0]); add_essential_input_file("base/init-bare.zeek"); add_essential_input_file("base/init-frameworks-and-bifs.zeek"); - if ( ! bare_mode ) + if ( ! options.bare_mode ) add_input_file("base/init-default.zeek"); plugin_mgr->SearchDynamicPlugins(bro_plugin_path()); - if ( optind == argc && - read_files.length() == 0 && - interfaces.length() == 0 && - ! id_name && ! command_line_policy && ! print_plugins ) + if ( options.plugins_to_load.empty() && options.scripts_to_load.empty() && + options.script_options_to_set.empty() && + options.pcap_files.size() == 0 && + options.interfaces.size() == 0 && + options.identifier_to_print.empty() && + ! command_line_policy && ! options.print_plugins && + ! use_supervisor ) add_input_file("-"); - // Process remaining arguments. X=Y arguments indicate script - // variable/parameter assignments. X::Y arguments indicate plugins to - // activate/query. The remainder are treated as scripts to load. - while ( optind < argc ) - { - if ( strchr(argv[optind], '=') ) - params.push_back(argv[optind++]); - else if ( strstr(argv[optind], "::") ) - requested_plugins.insert(argv[optind++]); - else - add_input_file(argv[optind++]); - } + for ( const auto& script_option : options.script_options_to_set ) + params.push_back(script_option); + + for ( const auto& plugin : options.plugins_to_load ) + requested_plugins.insert(plugin); + + for ( const auto& script : options.scripts_to_load ) + add_input_file(script.data()); push_scope(nullptr, nullptr); @@ -755,7 +911,7 @@ int main(int argc, char** argv) log_mgr = new logging::Manager(); input_mgr = new input::Manager(); file_mgr = new file_analysis::Manager(); - broker_mgr = new bro_broker::Manager(read_files.length() > 0); + broker_mgr = new bro_broker::Manager(! options.pcap_files.empty()); plugin_mgr->InitPreScript(); analyzer_mgr->InitPreScript(); @@ -774,7 +930,7 @@ int main(int argc, char** argv) if ( missing_plugin ) reporter->FatalError("Failed to activate requested dynamic plugin(s)."); - plugin_mgr->ActivateDynamicPlugins(! bare_mode); + plugin_mgr->ActivateDynamicPlugins(! options.bare_mode); init_event_handlers(); @@ -830,9 +986,9 @@ int main(int argc, char** argv) zeekygen_mgr->InitPostScript(); broker_mgr->InitPostScript(); - if ( print_plugins ) + if ( options.print_plugins ) { - bool success = show_plugins(print_plugins); + bool success = show_plugins(options.print_plugins); exit(success ? 0 : 1); } @@ -840,7 +996,7 @@ int main(int argc, char** argv) file_mgr->InitPostScript(); dns_mgr->InitPostScript(); - if ( parse_only ) + if ( options.parse_only ) { int rc = (reporter->Errors() > 0 ? 1 : 0); exit(rc); @@ -859,52 +1015,48 @@ int main(int argc, char** argv) reporter->InitOptions(); zeekygen_mgr->GenerateDocs(); - if ( user_pcap_filter ) + if ( ! options.pcap_filter.empty() ) { ID* id = global_scope()->Lookup("cmd_line_bpf_filter"); if ( ! id ) reporter->InternalError("global cmd_line_bpf_filter not defined"); - id->SetVal(new StringVal(user_pcap_filter)); + id->SetVal(new StringVal(options.pcap_filter)); } - // Parse rule files defined on the script level. - char* script_rule_files = - copy_string(internal_val("signature_files")->AsString()->CheckString()); + auto all_signature_files = options.signature_files; - char* tmp = script_rule_files; - char* s; - while ( (s = strsep(&tmp, " \t")) ) - if ( *s ) - rule_files.push_back(s); + // Append signature files defined in "signature_files" script option + for ( auto&& sf : get_script_signature_files() ) + all_signature_files.emplace_back(std::move(sf)); // Append signature files defined in @load-sigs - for ( size_t i = 0; i < sig_files.size(); ++i ) - rule_files.push_back(copy_string(sig_files[i].c_str())); + for ( const auto& sf : sig_files ) + all_signature_files.emplace_back(sf); - if ( rule_files.length() > 0 ) + if ( ! all_signature_files.empty() ) { - rule_matcher = new RuleMatcher(RE_level); - if ( ! rule_matcher->ReadFiles(rule_files) ) + rule_matcher = new RuleMatcher(options.signature_re_level); + if ( ! rule_matcher->ReadFiles(all_signature_files) ) { delete dns_mgr; exit(1); } - if ( rule_debug ) + if ( options.print_signature_debug_info ) rule_matcher->PrintDebug(); file_mgr->InitMagic(); } - delete [] script_rule_files; - if ( g_policy_debug ) // ### Add support for debug command file. dbg_init_debugger(0); - if ( read_files.length() == 0 && interfaces.length() == 0 ) + auto all_interfaces = options.interfaces; + + if ( options.pcap_files.empty() && options.interfaces.empty() ) { Val* interfaces_val = internal_val("interfaces"); if ( interfaces_val ) @@ -913,14 +1065,15 @@ int main(int argc, char** argv) interfaces_val->AsString()->Render(); if ( interfaces_str[0] != '\0' ) - add_to_name_list(interfaces_str, ' ', interfaces); + tokenize_string(interfaces_str, " ", &all_interfaces); delete [] interfaces_str; } } if ( dns_type != DNS_PRIME ) - net_init(interfaces, read_files, writefile, do_watchdog); + net_init(all_interfaces, options.pcap_files, + options.pcap_output_file, options.use_watchdog); net_done = internal_handler("net_done"); @@ -949,11 +1102,11 @@ int main(int argc, char** argv) } // Print the ID. - if ( id_name ) + if ( ! options.identifier_to_print.empty() ) { - ID* id = global_scope()->Lookup(id_name); + ID* id = global_scope()->Lookup(options.identifier_to_print); if ( ! id ) - reporter->FatalError("No such ID: %s\n", id_name); + reporter->FatalError("No such ID: %s\n", options.identifier_to_print.data()); ODesc desc; desc.SetQuotes(true); @@ -1009,7 +1162,7 @@ int main(int argc, char** argv) g_frame_stack.pop_back(); } - if ( override_ignore_checksums ) + if ( options.ignore_checksums ) ignore_checksums = 1; if ( zeek_script_loaded ) @@ -1043,6 +1196,9 @@ int main(int argc, char** argv) iosource_mgr->Register(thread_mgr, true); + if ( zeek::supervisor ) + iosource_mgr->Register(zeek::supervisor); + if ( iosource_mgr->Size() > 0 || have_pending_timers || BifConst::exit_only_after_terminate ) @@ -1067,7 +1223,7 @@ int main(int argc, char** argv) uint64_t mem_net_start_total; uint64_t mem_net_start_malloced; - if ( time_bro ) + if ( options.print_execution_time ) { get_memory_usage(&mem_net_start_total, &mem_net_start_malloced); @@ -1085,7 +1241,7 @@ int main(int argc, char** argv) uint64_t mem_net_done_total; uint64_t mem_net_done_malloced; - if ( time_bro ) + if ( options.print_execution_time ) { get_memory_usage(&mem_net_done_total, &mem_net_done_malloced); diff --git a/src/scan.l b/src/scan.l index 8a5b1c9fe8..9e0a95f56d 100644 --- a/src/scan.l +++ b/src/scan.l @@ -434,10 +434,9 @@ when return TOK_WHEN; pref = skip_whitespace(pref + 1); // Skip over '='. if ( ! append ) - while ( prefixes.length() > 1 ) // don't delete "" prefix - delete prefixes.remove_nth(1); + zeek_script_prefixes = { "" }; // don't delete the "" prefix - add_to_name_list(pref, ':', prefixes); + tokenize_string(pref, ":", &zeek_script_prefixes); } @if return TOK_ATIF; @@ -942,14 +941,14 @@ int yywrap() it->prefixes_checked = true; // Prefixes are pushed onto a stack, so iterate backwards. - for ( int i = prefixes.length() - 1; i >= 0; --i ) + for ( int i = zeek_script_prefixes.size() - 1; i >= 0; --i ) { // Don't look at empty prefixes. - if ( ! prefixes[i][0] ) + if ( ! zeek_script_prefixes[i][0] ) continue; string canon = without_bropath_component(it->name); - string flat = flatten_script_name(canon, prefixes[i]); + string flat = flatten_script_name(canon, zeek_script_prefixes[i]); string path = find_relative_script_file(flat); if ( ! path.empty() ) diff --git a/src/threading/BasicThread.cc b/src/threading/BasicThread.cc index 67434957e5..de0170a076 100644 --- a/src/threading/BasicThread.cc +++ b/src/threading/BasicThread.cc @@ -1,18 +1,11 @@ #include +#include #include "zeek-config.h" #include "BasicThread.h" #include "Manager.h" -#include "pthread.h" - -#ifdef HAVE_LINUX -#include -#endif - -#ifdef __FreeBSD__ -#include -#endif +#include "util.h" using namespace threading; @@ -54,18 +47,7 @@ void BasicThread::SetName(const char* arg_name) void BasicThread::SetOSName(const char* arg_name) { static_assert(std::is_same::value, "libstdc++ doesn't use pthread_t"); - -#ifdef HAVE_LINUX - prctl(PR_SET_NAME, arg_name, 0, 0, 0); -#endif - -#ifdef __APPLE__ - pthread_setname_np(arg_name); -#endif - -#ifdef __FreeBSD__ - pthread_set_name_np(thread.native_handle(), arg_name); -#endif + zeek::set_thread_name(arg_name, thread.native_handle()); } const char* BasicThread::Fmt(const char* format, ...) diff --git a/src/util.cc b/src/util.cc index 461835964e..d4cfb42f9b 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1003,7 +1003,7 @@ string bro_prefixes() { string rval; - for ( const auto& prefix : prefixes ) + for ( const auto& prefix : zeek_script_prefixes ) { if ( ! rval.empty() ) rval.append(":"); @@ -1936,3 +1936,18 @@ string json_escape_utf8(const string& val) return result; } + +void zeek::set_thread_name(const char* name, pthread_t tid) + { +#ifdef HAVE_LINUX + prctl(PR_SET_NAME, name, 0, 0, 0); +#endif + +#ifdef __APPLE__ + pthread_setname_np(name); +#endif + +#ifdef __FreeBSD__ + pthread_set_name_np(tid, name); +#endif + } diff --git a/src/util.h b/src/util.h index f25605340e..07d33bbef7 100644 --- a/src/util.h +++ b/src/util.h @@ -60,6 +60,15 @@ extern HeapLeakChecker* heap_checker; #endif #include +#include + +#ifdef HAVE_LINUX +#include +#endif + +#ifdef __FreeBSD__ +#include +#endif ZEEK_DEPRECATED("Remove in v4.1. Use uint64_t instead.") typedef uint64_t uint64; @@ -579,3 +588,14 @@ std::unique_ptr build_unique (Args&&... args) { * @return the escaped string */ std::string json_escape_utf8(const std::string& val); + +namespace zeek { +/** + * Set the process/thread name. May not be supported on all OSs. + * @param name new name for the process/thread. OS limitations typically + * truncate the name to 15 bytes maximum. + * @param tid handle of thread whose name shall change + */ +void set_thread_name(const char* name, pthread_t tid = pthread_self()); + +} // namespace zeek From 52f7647f25b394ea23353df65e93f92f36f586b2 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 4 Oct 2019 13:25:01 -0700 Subject: [PATCH 02/76] Add supervisor stem process auto-revival --- src/Pipe.cc | 92 +++++++++++++++++++----- src/Pipe.h | 18 ++++- src/Supervisor.cc | 175 +++++++++++++++++++++++++++++++++------------- src/Supervisor.h | 5 ++ src/main.cc | 88 ++++++++++++++++------- 5 files changed, 285 insertions(+), 93 deletions(-) diff --git a/src/Pipe.cc b/src/Pipe.cc index 7276571402..97ccd7f3c5 100644 --- a/src/Pipe.cc +++ b/src/Pipe.cc @@ -20,21 +20,52 @@ static void pipe_fail(int eno) fprintf(stderr, "Pipe failure: %s", tmp); } -static void set_flags(int fd, int flags) +static int set_flags(int fd, int flags) { + auto rval = fcntl(fd, F_GETFD); + if ( flags ) - if ( fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | flags) == -1 ) + { + rval |= flags; + + if ( fcntl(fd, F_SETFD, rval) == -1 ) pipe_fail(errno); + } + + return rval; } -static void set_status_flags(int fd, int flags) +static int unset_flags(int fd, int flags) { + auto rval = fcntl(fd, F_GETFD); + if ( flags ) - if ( fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | flags) == -1 ) + { + rval &= ~flags; + + if ( fcntl(fd, F_SETFD, rval) == -1 ) pipe_fail(errno); + } + + return rval; } -static int dup_or_fail(int fd, int flags) +static int set_status_flags(int fd, int flags) + { + auto rval = fcntl(fd, F_GETFL); + + if ( flags ) + { + rval |= flags; + + if ( fcntl(fd, F_SETFL, rval) == -1 ) + pipe_fail(errno); + } + + return rval; + } + +static int dup_or_fail(int fd, int flags, int status_flags) { int rval = dup(fd); @@ -42,22 +73,41 @@ static int dup_or_fail(int fd, int flags) pipe_fail(errno); set_flags(fd, flags); + set_status_flags(fd, status_flags); return rval; } -Pipe::Pipe(int flags0, int flags1, int status_flags0, int status_flags1) +Pipe::Pipe(int flags0, int flags1, int status_flags0, int status_flags1, + int* arg_fds) { - // pipe2 can set flags atomically, but not yet available everywhere. - if ( ::pipe(fds) ) - pipe_fail(errno); + if ( arg_fds ) + { + fds[0] = arg_fds[0]; + fds[1] = arg_fds[1]; + } + else + { + // pipe2 can set flags atomically, but not yet available everywhere. + if ( ::pipe(fds) ) + pipe_fail(errno); + } - flags[0] = flags0; - flags[1] = flags1; + flags[0] = set_flags(fds[0], flags[0]); + flags[1] = set_flags(fds[1], flags[1]); + status_flags[0] = set_status_flags(fds[0], status_flags0); + status_flags[1] = set_status_flags(fds[1], status_flags1); + } - set_flags(fds[0], flags[0]); - set_flags(fds[1], flags[1]); - set_status_flags(fds[0], status_flags0); - set_status_flags(fds[1], status_flags1); +void Pipe::SetFlags(int arg_flags) + { + flags[0] = set_flags(fds[0], arg_flags); + flags[1] = set_flags(fds[1], arg_flags); + } + +void Pipe::UnsetFlags(int arg_flags) + { + flags[0] = unset_flags(fds[0], arg_flags); + flags[1] = unset_flags(fds[1], arg_flags); } Pipe::~Pipe() @@ -68,10 +118,12 @@ Pipe::~Pipe() Pipe::Pipe(const Pipe& other) { - fds[0] = dup_or_fail(other.fds[0], other.flags[0]); - fds[1] = dup_or_fail(other.fds[1], other.flags[1]); + fds[0] = dup_or_fail(other.fds[0], other.flags[0], other.status_flags[0]); + fds[1] = dup_or_fail(other.fds[1], other.flags[1], other.status_flags[1]); flags[0] = other.flags[0]; flags[1] = other.flags[1]; + status_flags[0] = other.status_flags[0]; + status_flags[1] = other.status_flags[1]; } Pipe& Pipe::operator=(const Pipe& other) @@ -81,9 +133,11 @@ Pipe& Pipe::operator=(const Pipe& other) close(fds[0]); close(fds[1]); - fds[0] = dup_or_fail(other.fds[0], other.flags[0]); - fds[1] = dup_or_fail(other.fds[1], other.flags[1]); + fds[0] = dup_or_fail(other.fds[0], other.flags[0], other.status_flags[0]); + fds[1] = dup_or_fail(other.fds[1], other.flags[1], other.status_flags[1]); flags[0] = other.flags[0]; flags[1] = other.flags[1]; + status_flags[0] = other.status_flags[0]; + status_flags[1] = other.status_flags[1]; return *this; } diff --git a/src/Pipe.h b/src/Pipe.h index eed32bac01..ad09d58b3f 100644 --- a/src/Pipe.h +++ b/src/Pipe.h @@ -13,9 +13,12 @@ public: * @param flags1 file descriptor flags to set on write end of pipe. * @param status_flags0 descriptor status flags to set on read end of pipe. * @param status_flags1 descriptor status flags to set on write end of pipe. + * @param fds may be supplied to open an existing file descriptors rather + * than create ones from a new pipe. Should point to memory containing + * two consecutive file descriptors, the "read" one and then the "write" one. */ explicit Pipe(int flags0 = 0, int flags1 = 0, int status_flags0 = 0, - int status_flags1 = 0); + int status_flags1 = 0, int* fds = nullptr); /** * Close the pair of file descriptors owned by the object. @@ -45,9 +48,22 @@ public: int WriteFD() const { return fds[1]; } + /** + * Sets the given file descriptor flags for both the read and write end + * of the pipe. + */ + void SetFlags(int flags); + + /** + * Unsets the given file descriptor flags for both the read and write end + * of the pipe. + */ + void UnsetFlags(int flags); + private: int fds[2]; int flags[2]; + int status_flags[2]; }; } // namespace bro diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 2d9cc64e18..f63a8e350e 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -1,6 +1,9 @@ +#include #include +#include #include +#include #include "Supervisor.h" #include "Reporter.h" @@ -30,11 +33,6 @@ zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, SetIdle(true); } -void zeek::Supervisor::ObserveChildSignal() - { - signal_flare.Fire(); - } - zeek::Supervisor::~Supervisor() { if ( ! stem_pid ) @@ -68,6 +66,94 @@ zeek::Supervisor::~Supervisor() } } +void zeek::Supervisor::ObserveChildSignal() + { + signal_flare.Fire(); + } + +void zeek::Supervisor::HandleChildSignal() + { + if ( ! stem_pid ) + return; + + auto child_signals = signal_flare.Extinguish(); + + if ( ! child_signals ) + return; + + DBG_LOG(DBG_SUPERVISOR, "handle %d child signals, wait for stem pid %d", + child_signals, stem_pid); + + int status; + auto res = waitpid(stem_pid, &status, WNOHANG); + + if ( res == 0 ) + { + DBG_LOG(DBG_SUPERVISOR, "false alarm, stem process still lives"); + } + else if ( res == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("Supervisor failed to get exit status" + " of stem process: %s", tmp); + } + else if ( WIFEXITED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", + WEXITSTATUS(status)); + stem_pid = 0; + } + else if ( WIFSIGNALED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", + WTERMSIG(status)); + stem_pid = 0; + } + else + reporter->Error("Supervisor failed to get exit status" + " of stem process for unknown reason"); + + if ( ! stem_pid ) + { + // Revive the Stem process + stem_pid = fork(); + + if ( stem_pid == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("failed to fork Zeek supervisor stem process: %s\n", tmp); + signal_flare.Fire(); + // Sleep to avoid spining too fast in a revival-fail loop. + sleep(1); + } + else if ( stem_pid == 0 ) + { + char stem_env[256]; + safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d", + stem_pipe->ReadFD(), stem_pipe->WriteFD()); + char* env[] = { stem_env, (char*)0 }; + stem_pipe->UnsetFlags(FD_CLOEXEC); + auto res = execle(config.zeek_exe_path.data(), + config.zeek_exe_path.data(), + (char*)0, env); + + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", tmp); + exit(1); + } + else + { + DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); + } + } + + // TODO: Stem process needs a way to inform Supervisor not to revive + } + + void zeek::Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, iosource::FD_Set* except) { @@ -85,51 +171,44 @@ double zeek::Supervisor::NextTimestamp(double* local_network_time) void zeek::Supervisor::Process() { - auto child_signals = signal_flare.Extinguish(); + HandleChildSignal(); - DBG_LOG(DBG_SUPERVISOR, "process: child_signals %d, stem_pid %d", - child_signals, stem_pid); + char buf[256]; + int bytes_read = read(stem_pipe->ReadFD(), buf, 256); - if ( child_signals && stem_pid ) + if ( bytes_read > 0 ) { - DBG_LOG(DBG_SUPERVISOR, "handle child signal, wait for %d", stem_pid); - int status; - auto res = waitpid(stem_pid, &status, WNOHANG); - - if ( res == 0 ) - { - DBG_LOG(DBG_SUPERVISOR, "false alarm, stem process still lives"); - } - else if ( res == -1 ) - { - char tmp[256]; - bro_strerror_r(errno, tmp, sizeof(tmp)); - reporter->Error("Supervisor failed to get exit status" - " of stem process: %s", tmp); - } - else if ( WIFEXITED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", - WEXITSTATUS(status)); - stem_pid = 0; - } - else if ( WIFSIGNALED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", - WTERMSIG(status)); - stem_pid = 0; - } - else - { - reporter->Error("Supervisor failed to get exit status" - " of stem process for unknown reason"); - } - - // TODO: add proper handling of stem process exiting - // In wait cases is it ok for the stem process to terminate and - // in what cases do we need to automatically re-recreate it ? - // And how do we re-create it? It would be too late to fork() again - // because we've potentially already changed so much global state by the - // time we get there, so guess we exec() and start over completely ?. + DBG_LOG(DBG_SUPERVISOR, "read msg from Stem: %.*s", bytes_read, buf); + } + } + +void zeek::Supervisor::RunStem(std::unique_ptr pipe) + { + zeek::set_thread_name("zeek-stem"); + // TODO: changing the process group here so that SIGINT to the + // supervisor doesn't also get passed to the children. i.e. supervisor + // should be in charge of initiating orderly shutdown. But calling + // just setpgid() like this is technically a race-condition -- need + // to do more work of blocking SIGINT before fork(), unblocking after, + // then also calling setpgid() from parent. And just not doing that + // until more is known whether that's the right SIGINT behavior in + // the first place. + auto res = setpgid(0, 0); + + if ( res == -1 ) + fprintf(stderr, "failed to set stem process group: %s\n", + strerror(errno)); + + for ( ; ; ) + { + // TODO: make a proper I/O loop w/ message processing via pipe + // TODO: better way to detect loss of parent than polling + + if ( getppid() == 1 ) + exit(0); + + sleep(5); + printf("Stem wakeup\n"); + write(pipe->WriteFD(), "hi", 2); } } diff --git a/src/Supervisor.h b/src/Supervisor.h index aa647c5aed..7d553d3b68 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -16,9 +16,12 @@ namespace zeek { class Supervisor : public iosource::IOSource { public: + static void RunStem(std::unique_ptr pipe); + struct Config { int num_workers = 1; std::vector pcaps; + std::string zeek_exe_path; }; Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); @@ -40,6 +43,8 @@ private: void Process() override; + void HandleChildSignal(); + const char* Tag() override { return "zeek::Supervisor"; } diff --git a/src/main.cc b/src/main.cc index a76355f3a3..aeae9230fe 100644 --- a/src/main.cc +++ b/src/main.cc @@ -679,8 +679,48 @@ static std::vector get_script_signature_files() return rval; } +static std::string get_exe_path(std::string invocation) + { + if ( invocation.empty() ) + return ""; + + if ( invocation[0] == '/' ) + // Absolute path + return invocation; + + if ( invocation.find('/') != std::string::npos ) + { + // Relative path + char cwd[PATH_MAX]; + + if ( ! getcwd(cwd, sizeof(cwd)) ) + { + fprintf(stderr, "failed to get current directory: %s\n", + strerror(errno)); + exit(1); + } + + return std::string(cwd) + "/" + invocation; + } + + auto path = getenv("PATH"); + + if ( ! path ) + return ""; + + return find_file(invocation, path); + } + int main(int argc, char** argv) { + auto zeek_exe_path = get_exe_path(argv[0]); + + if ( zeek_exe_path.empty() ) + { + fprintf(stderr, "failed to get path to executable '%s'", argv[0]); + exit(1); + } + bro_argc = argc; bro_argv = new char* [argc]; @@ -716,33 +756,30 @@ int main(int argc, char** argv) } if ( stem_pid == 0 ) + zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + } + + auto zeek_stem_env = getenv("ZEEK_STEM"); + + if ( zeek_stem_env ) + { + std::vector fd_strings; + tokenize_string(zeek_stem_env, ",", &fd_strings); + + if ( fd_strings.size() != 2 ) { - zeek::set_thread_name("zeek-stem"); - // TODO: changing the process group here so that SIGINT to the - // supervisor doesn't also get passed to the children. i.e. supervisor - // should be in charge of initiating orderly shutdown. But calling - // just setpgid() like this is technically a race-condition -- need - // to do more work of blocking SIGINT before fork(), unblocking after, - // then also calling setpgid() from parent. And just not doing that - // until more is known whether that's the right SIGINT behavior in - // the first place. - auto res = setpgid(0, 0); - - if ( res == -1 ) - fprintf(stderr, "failed to set stem process group: %s\n", - strerror(errno)); - - for ( ; ; ) - { - // TODO: make a proper I/O loop w/ message processing via pipe - // TODO: better way to detect loss of parent than polling - - if ( getppid() == 1 ) - exit(0); - - sleep(1); - } + fprintf(stderr, "invalid ZEEK_STEM environment variable value: '%s'\n", + zeek_stem_env); + exit(1); } + + int fds[2]; + fds[0] = std::stoi(fd_strings[0]); + fds[1] = std::stoi(fd_strings[1]); + + supervisor_pipe.reset(new bro::Pipe{FD_CLOEXEC, FD_CLOEXEC, + O_NONBLOCK, O_NONBLOCK, fds}); + zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } std::set_new_handler(bro_new_handler); @@ -823,6 +860,7 @@ int main(int argc, char** argv) zeek::Supervisor::Config cfg = {}; cfg.pcaps = options.pcap_files; cfg.num_workers = options.supervised_workers; + cfg.zeek_exe_path = zeek_exe_path; zeek::supervisor = new zeek::Supervisor(std::move(cfg), std::move(supervisor_pipe), stem_pid); From e46cf884353a79252304b4b822aa58c4790f4715 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 15 Oct 2019 12:59:45 -0700 Subject: [PATCH 03/76] Add Supervisor BIF/event API skeleton --- .../base/frameworks/supervisor/__load__.zeek | 2 + scripts/base/frameworks/supervisor/api.zeek | 35 ++++++++++ scripts/base/frameworks/supervisor/main.zeek | 70 +++++++++++++++++++ scripts/base/init-bare.zeek | 2 + scripts/base/init-frameworks-and-bifs.zeek | 1 + src/CMakeLists.txt | 1 + src/Func.cc | 3 + src/NetVar.cc | 2 + src/NetVar.h | 1 + src/Supervisor.cc | 29 ++++++++ src/Supervisor.h | 5 ++ src/supervisor.bif | 35 ++++++++++ .../core.check-unused-event-handlers/.stderr | 7 +- .../canonified_loaded_scripts.log | 8 ++- .../canonified_loaded_scripts.log | 8 ++- testing/btest/Baseline/plugins.hooks/output | 35 ++++++++-- 16 files changed, 232 insertions(+), 12 deletions(-) create mode 100644 scripts/base/frameworks/supervisor/__load__.zeek create mode 100644 scripts/base/frameworks/supervisor/api.zeek create mode 100644 scripts/base/frameworks/supervisor/main.zeek create mode 100644 src/supervisor.bif diff --git a/scripts/base/frameworks/supervisor/__load__.zeek b/scripts/base/frameworks/supervisor/__load__.zeek new file mode 100644 index 0000000000..e3034f5f0b --- /dev/null +++ b/scripts/base/frameworks/supervisor/__load__.zeek @@ -0,0 +1,2 @@ +@load ./api +@load ./main diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek new file mode 100644 index 0000000000..31480bda51 --- /dev/null +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -0,0 +1,35 @@ +##! The Zeek process supervision API. +# TODO: add proper docs + +module Supervisor; + +export { + type Status: record { + # TODO: add proper status fields + n: count; + }; + + type NodeConfig: record { + # TODO: add proper config field + name: string; + }; + + global status: function(nodes: string &default="all"): Status; + global create: function(config: NodeConfig): string; + global destroy: function(nodes: string): bool; + global restart: function(nodes: string &default="all"): bool; + + global Supervisor::stop_request: event(); + + global Supervisor::status_request: event(id: count, nodes: string); + global Supervisor::status_response: event(id: count, result: Status); + + global Supervisor::create_request: event(id: count, config: NodeConfig); + global Supervisor::create_response: event(id: count, result: string); + + global Supervisor::destroy_request: event(id: count, nodes: string); + global Supervisor::destroy_response: event(id: count, result: bool); + + global Supervisor::restart_request: event(id: count, nodes: string); + global Supervisor::restart_response: event(id: count, result: bool); +} diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek new file mode 100644 index 0000000000..3dc0651003 --- /dev/null +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -0,0 +1,70 @@ +##! Implements Zeek process supervision configuration options and default +##! behavior. +# TODO: add proper docs + +@load ./api +@load base/frameworks/broker + +module Supervisor; + +export { + const topic_prefix = "zeek/supervisor" &redef; +} + +event zeek_init() &priority=10 + { + Broker::subscribe(Supervisor::topic_prefix); + } + +event Supervisor::stop_request() + { + terminate(); + } + +event Supervisor::status_request(id: count, nodes: string) + { + local res = Supervisor::status(nodes); + local topic = Supervisor::topic_prefix + "/status_response"; + Broker::publish(topic, Supervisor::status_response, id, res); + } + +event Supervisor::create_request(id: count, config: NodeConfig) + { + local res = Supervisor::create(config); + local topic = Supervisor::topic_prefix + "/create_response"; + Broker::publish(topic, Supervisor::create_response, id, res); + } + +event Supervisor::destroy_request(id: count, nodes: string) + { + local res = Supervisor::destroy(nodes); + local topic = Supervisor::topic_prefix + "/destroy_response"; + Broker::publish(topic, Supervisor::destroy_response, id, res); + } + +event Supervisor::restart_request(id: count, nodes: string) + { + local res = Supervisor::restart(nodes); + local topic = Supervisor::topic_prefix + "/restart_response"; + Broker::publish(topic, Supervisor::restart_response, id, res); + } + +function Supervisor::status(nodes: string): Status + { + return Supervisor::__status(nodes); + } + +function create(config: NodeConfig): string + { + return Supervisor::__create(config); + } + +function destroy(nodes: string): bool + { + return Supervisor::__destroy(nodes); + } + +function restart(nodes: string): bool + { + return Supervisor::__restart(nodes); + } diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 82993bfb03..4667fb1fb2 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -1781,6 +1781,8 @@ type gtp_delete_pdp_ctx_response_elements: record { @load base/bif/reporter.bif @load base/bif/strings.bif @load base/bif/option.bif +@load base/frameworks/supervisor/api +@load base/bif/supervisor.bif global done_with_network = F; event net_done(t: time) { done_with_network = T; } diff --git a/scripts/base/init-frameworks-and-bifs.zeek b/scripts/base/init-frameworks-and-bifs.zeek index 19897e7ffb..a667110a87 100644 --- a/scripts/base/init-frameworks-and-bifs.zeek +++ b/scripts/base/init-frameworks-and-bifs.zeek @@ -5,6 +5,7 @@ # the separate file). @load base/frameworks/logging @load base/frameworks/broker +@load base/frameworks/supervisor @load base/frameworks/input @load base/frameworks/analyzer @load base/frameworks/files diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2655b7f531..6ccb7c9124 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -111,6 +111,7 @@ set(BIF_SRCS strings.bif reporter.bif option.bif + supervisor.bif ) foreach (bift ${BIF_SRCS}) diff --git a/src/Func.cc b/src/Func.cc index 747029e4f8..e0debf40d5 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -717,12 +717,14 @@ void builtin_error(const char* msg, BroObj* arg) #include "reporter.bif.func_h" #include "strings.bif.func_h" #include "option.bif.func_h" +#include "supervisor.bif.func_h" #include "zeek.bif.func_def" #include "stats.bif.func_def" #include "reporter.bif.func_def" #include "strings.bif.func_def" #include "option.bif.func_def" +#include "supervisor.bif.func_def" #include "__all__.bif.cc" // Autogenerated for compiling in the bif_target() code. #include "__all__.bif.register.cc" // Autogenerated for compiling in the bif_target() code. @@ -750,6 +752,7 @@ void init_builtin_funcs() #include "reporter.bif.func_init" #include "strings.bif.func_init" #include "option.bif.func_init" +#include "supervisor.bif.func_init" did_builtin_init = true; } diff --git a/src/NetVar.cc b/src/NetVar.cc index 1ab99170bb..c3f8b34340 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -196,6 +196,7 @@ bro_uint_t bits_per_uid; #include "types.bif.netvar_def" #include "event.bif.netvar_def" #include "reporter.bif.netvar_def" +#include "supervisor.bif.netvar_def" void init_event_handlers() { @@ -240,6 +241,7 @@ void init_net_var() #include "const.bif.netvar_init" #include "types.bif.netvar_init" #include "reporter.bif.netvar_init" +#include "supervisor.bif.netvar_init" conn_id = internal_type("conn_id")->AsRecordType(); endpoint = internal_type("endpoint")->AsRecordType(); diff --git a/src/NetVar.h b/src/NetVar.h index 95eb99d844..a02c742859 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -203,3 +203,4 @@ extern void init_net_var(); #include "types.bif.netvar_h" #include "event.bif.netvar_h" #include "reporter.bif.netvar_h" +#include "supervisor.bif.netvar_h" diff --git a/src/Supervisor.cc b/src/Supervisor.cc index f63a8e350e..d6b7620abd 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -8,6 +8,8 @@ #include "Supervisor.h" #include "Reporter.h" #include "DebugLogger.h" +#include "Val.h" +#include "NetVar.h" #include "zeek-config.h" #include "util.h" @@ -212,3 +214,30 @@ void zeek::Supervisor::RunStem(std::unique_ptr pipe) write(pipe->WriteFD(), "hi", 2); } } + +RecordVal* zeek::Supervisor::Status(const std::string& nodes) + { + // TODO: return real status information + static auto count = 0; + auto rval = new RecordVal(BifType::Record::Supervisor::Status); + rval->Assign(0, val_mgr->GetCount(count++)); + return rval; + } + +std::string zeek::Supervisor::Create(const RecordVal* node_config) + { + // TODO: return error msg on fail, or empty on success + return ""; + } + +bool zeek::Supervisor::Destroy(const std::string& nodes) + { + // TODO: return true if a matching node exists + return false; + } + +bool zeek::Supervisor::Restart(const std::string& nodes) + { + // TODO: return true if a matching node exists + return false; + } diff --git a/src/Supervisor.h b/src/Supervisor.h index 7d553d3b68..0713b1f5cd 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -33,6 +33,11 @@ public: void ObserveChildSignal(); + RecordVal* Status(const std::string& nodes); + std::string Create(const RecordVal* node_config); + bool Destroy(const std::string& nodes); + bool Restart(const std::string& nodes); + private: // IOSource interface overrides: diff --git a/src/supervisor.bif b/src/supervisor.bif new file mode 100644 index 0000000000..cac895e774 --- /dev/null +++ b/src/supervisor.bif @@ -0,0 +1,35 @@ +##! The BIFs that define the Zeek supervisor control interface. + +%%{ +#include "Supervisor.h" +%%} + +module Supervisor; + +type Supervisor::Status: record; +type Supervisor::NodeConfig: record; + +function Supervisor::__status%(nodes: string%): Supervisor::Status + %{ + return zeek::supervisor->Status(nodes->CheckString()); + %} + +function Supervisor::__create%(config: Supervisor::NodeConfig%): string + %{ + auto rval = zeek::supervisor->Create(config->AsRecordVal()); + return new StringVal(rval); + %} + +function Supervisor::__destroy%(nodes: string%): bool + %{ + auto rval = zeek::supervisor->Destroy(nodes->CheckString()); + return val_mgr->GetBool(rval); + %} + +function Supervisor::__restart%(nodes: string%): bool + %{ + auto rval = zeek::supervisor->Restart(nodes->CheckString()); + return val_mgr->GetBool(rval); + %} + +# TODO: BIFs for "restart", "add", "remove" operations diff --git a/testing/btest/Baseline/core.check-unused-event-handlers/.stderr b/testing/btest/Baseline/core.check-unused-event-handlers/.stderr index 8c4e4def40..f7d99adbfe 100644 --- a/testing/btest/Baseline/core.check-unused-event-handlers/.stderr +++ b/testing/btest/Baseline/core.check-unused-event-handlers/.stderr @@ -1,3 +1,8 @@ -warning in , line 1: event handler never invoked: this_is_never_used warning in , line 1: event handler never invoked: InputConfig::new_value warning in , line 1: event handler never invoked: InputRaw::process_finished +warning in , line 1: event handler never invoked: Supervisor::create_request +warning in , line 1: event handler never invoked: Supervisor::destroy_request +warning in , line 1: event handler never invoked: Supervisor::restart_request +warning in , line 1: event handler never invoked: Supervisor::status_request +warning in , line 1: event handler never invoked: Supervisor::stop_request +warning in , line 1: event handler never invoked: this_is_never_used diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 072b83ffe0..76114aac6f 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2019-07-29-19-05-26 +#open 2019-10-15-01-48-24 #fields name #types string scripts/base/init-bare.zeek @@ -14,6 +14,8 @@ scripts/base/init-bare.zeek build/scripts/base/bif/reporter.bif.zeek build/scripts/base/bif/strings.bif.zeek build/scripts/base/bif/option.bif.zeek + scripts/base/frameworks/supervisor/api.zeek + build/scripts/base/bif/supervisor.bif.zeek build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek build/scripts/base/bif/event.bif.zeek @@ -35,6 +37,8 @@ scripts/base/init-frameworks-and-bifs.zeek build/scripts/base/bif/data.bif.zeek build/scripts/base/bif/store.bif.zeek scripts/base/frameworks/broker/log.zeek + scripts/base/frameworks/supervisor/__load__.zeek + scripts/base/frameworks/supervisor/main.zeek scripts/base/frameworks/input/__load__.zeek scripts/base/frameworks/input/main.zeek build/scripts/base/bif/input.bif.zeek @@ -181,4 +185,4 @@ scripts/base/init-frameworks-and-bifs.zeek build/scripts/base/bif/plugins/Zeek_SQLiteWriter.sqlite.bif.zeek scripts/policy/misc/loaded-scripts.zeek scripts/base/utils/paths.zeek -#close 2019-07-29-19-05-26 +#close 2019-10-15-01-48-24 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 42dbfbd772..a95b7505f3 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2019-08-06-00-02-39 +#open 2019-10-15-01-48-24 #fields name #types string scripts/base/init-bare.zeek @@ -14,6 +14,8 @@ scripts/base/init-bare.zeek build/scripts/base/bif/reporter.bif.zeek build/scripts/base/bif/strings.bif.zeek build/scripts/base/bif/option.bif.zeek + scripts/base/frameworks/supervisor/api.zeek + build/scripts/base/bif/supervisor.bif.zeek build/scripts/base/bif/plugins/Zeek_SNMP.types.bif.zeek build/scripts/base/bif/plugins/Zeek_KRB.types.bif.zeek build/scripts/base/bif/event.bif.zeek @@ -35,6 +37,8 @@ scripts/base/init-frameworks-and-bifs.zeek build/scripts/base/bif/data.bif.zeek build/scripts/base/bif/store.bif.zeek scripts/base/frameworks/broker/log.zeek + scripts/base/frameworks/supervisor/__load__.zeek + scripts/base/frameworks/supervisor/main.zeek scripts/base/frameworks/input/__load__.zeek scripts/base/frameworks/input/main.zeek build/scripts/base/bif/input.bif.zeek @@ -375,4 +379,4 @@ scripts/base/init-default.zeek scripts/base/misc/find-filtered-trace.zeek scripts/base/misc/version.zeek scripts/policy/misc/loaded-scripts.zeek -#close 2019-08-06-00-02-39 +#close 2019-10-15-01-48-25 diff --git a/testing/btest/Baseline/plugins.hooks/output b/testing/btest/Baseline/plugins.hooks/output index 94cb9c0d34..de6971d4d5 100644 --- a/testing/btest/Baseline/plugins.hooks/output +++ b/testing/btest/Baseline/plugins.hooks/output @@ -157,6 +157,8 @@ 0.000000 MetaHookPost CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_TEREDO, {3544/udp})) -> 0.000000 MetaHookPost CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_VXLAN, {4789/udp})) -> 0.000000 MetaHookPost CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_XMPP, {5222<...>/tcp})) -> +0.000000 MetaHookPost CallFunction(Broker::__subscribe, , (zeek/supervisor)) -> +0.000000 MetaHookPost CallFunction(Broker::subscribe, , (zeek/supervisor)) -> 0.000000 MetaHookPost CallFunction(Cluster::is_enabled, , ()) -> 0.000000 MetaHookPost CallFunction(Cluster::is_enabled, , ()) -> 0.000000 MetaHookPost CallFunction(Cluster::local_node_type, , ()) -> @@ -274,7 +276,7 @@ 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) -> 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) -> 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -> -0.000000 MetaHookPost CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T])) -> +0.000000 MetaHookPost CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T])) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Broker::LOG)) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Cluster::LOG)) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Config::LOG)) -> @@ -455,7 +457,7 @@ 0.000000 MetaHookPost CallFunction(Log::create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) -> 0.000000 MetaHookPost CallFunction(Log::create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) -> 0.000000 MetaHookPost CallFunction(Log::create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -> -0.000000 MetaHookPost CallFunction(Log::write, , (PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T])) -> +0.000000 MetaHookPost CallFunction(Log::write, , (PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T])) -> 0.000000 MetaHookPost CallFunction(NetControl::check_plugins, , ()) -> 0.000000 MetaHookPost CallFunction(NetControl::init, , ()) -> 0.000000 MetaHookPost CallFunction(Notice::want_pp, , ()) -> @@ -693,6 +695,7 @@ 0.000000 MetaHookPost LoadFile(0, .<...>/add-geodata.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/addrs.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/analyzer.bif.zeek) -> -1 +0.000000 MetaHookPost LoadFile(0, .<...>/api.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/ascii.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/average.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/benchmark.zeek) -> -1 @@ -766,6 +769,7 @@ 0.000000 MetaHookPost LoadFile(0, .<...>/store.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/strings.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/sum.zeek) -> -1 +0.000000 MetaHookPost LoadFile(0, .<...>/supervisor.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/thresholds.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/top-k.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/topk.zeek) -> -1 @@ -787,6 +791,7 @@ 0.000000 MetaHookPost LoadFile(0, base<...>/addrs.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/analyzer) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/analyzer.bif.zeek) -> -1 +0.000000 MetaHookPost LoadFile(0, base<...>/api.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/bif) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/broker) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/cluster) -> -1 @@ -867,6 +872,8 @@ 0.000000 MetaHookPost LoadFile(0, base<...>/strings.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/strings.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/sumstats) -> -1 +0.000000 MetaHookPost LoadFile(0, base<...>/supervisor) -> -1 +0.000000 MetaHookPost LoadFile(0, base<...>/supervisor.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/syslog) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/thresholds.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, base<...>/time.zeek) -> -1 @@ -1052,6 +1059,8 @@ 0.000000 MetaHookPre CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_TEREDO, {3544/udp})) 0.000000 MetaHookPre CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_VXLAN, {4789/udp})) 0.000000 MetaHookPre CallFunction(Analyzer::register_for_ports, , (Analyzer::ANALYZER_XMPP, {5222<...>/tcp})) +0.000000 MetaHookPre CallFunction(Broker::__subscribe, , (zeek/supervisor)) +0.000000 MetaHookPre CallFunction(Broker::subscribe, , (zeek/supervisor)) 0.000000 MetaHookPre CallFunction(Cluster::is_enabled, , ()) 0.000000 MetaHookPre CallFunction(Cluster::is_enabled, , ()) 0.000000 MetaHookPre CallFunction(Cluster::local_node_type, , ()) @@ -1169,7 +1178,7 @@ 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -0.000000 MetaHookPre CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T])) +0.000000 MetaHookPre CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T])) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Broker::LOG)) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Cluster::LOG)) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Config::LOG)) @@ -1350,7 +1359,7 @@ 0.000000 MetaHookPre CallFunction(Log::create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) 0.000000 MetaHookPre CallFunction(Log::create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) 0.000000 MetaHookPre CallFunction(Log::create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -0.000000 MetaHookPre CallFunction(Log::write, , (PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T])) +0.000000 MetaHookPre CallFunction(Log::write, , (PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T])) 0.000000 MetaHookPre CallFunction(NetControl::check_plugins, , ()) 0.000000 MetaHookPre CallFunction(NetControl::init, , ()) 0.000000 MetaHookPre CallFunction(Notice::want_pp, , ()) @@ -1588,6 +1597,7 @@ 0.000000 MetaHookPre LoadFile(0, .<...>/add-geodata.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/addrs.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/analyzer.bif.zeek) +0.000000 MetaHookPre LoadFile(0, .<...>/api.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/ascii.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/average.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/benchmark.zeek) @@ -1661,6 +1671,7 @@ 0.000000 MetaHookPre LoadFile(0, .<...>/store.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/strings.bif.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/sum.zeek) +0.000000 MetaHookPre LoadFile(0, .<...>/supervisor.bif.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/thresholds.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/top-k.bif.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/topk.zeek) @@ -1682,6 +1693,7 @@ 0.000000 MetaHookPre LoadFile(0, base<...>/addrs.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/analyzer) 0.000000 MetaHookPre LoadFile(0, base<...>/analyzer.bif.zeek) +0.000000 MetaHookPre LoadFile(0, base<...>/api.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/bif) 0.000000 MetaHookPre LoadFile(0, base<...>/broker) 0.000000 MetaHookPre LoadFile(0, base<...>/cluster) @@ -1762,6 +1774,8 @@ 0.000000 MetaHookPre LoadFile(0, base<...>/strings.bif.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/strings.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/sumstats) +0.000000 MetaHookPre LoadFile(0, base<...>/supervisor) +0.000000 MetaHookPre LoadFile(0, base<...>/supervisor.bif.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/syslog) 0.000000 MetaHookPre LoadFile(0, base<...>/thresholds.zeek) 0.000000 MetaHookPre LoadFile(0, base<...>/time.zeek) @@ -1947,6 +1961,8 @@ 0.000000 | HookCallFunction Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, {3544/udp}) 0.000000 | HookCallFunction Analyzer::register_for_ports(Analyzer::ANALYZER_VXLAN, {4789/udp}) 0.000000 | HookCallFunction Analyzer::register_for_ports(Analyzer::ANALYZER_XMPP, {5222<...>/tcp}) +0.000000 | HookCallFunction Broker::__subscribe(zeek/supervisor) +0.000000 | HookCallFunction Broker::subscribe(zeek/supervisor) 0.000000 | HookCallFunction Cluster::is_enabled() 0.000000 | HookCallFunction Cluster::local_node_type() 0.000000 | HookCallFunction Cluster::register_pool([topic=zeek<...>/logger, node_type=Cluster::LOGGER, max_nodes=, exclusive=F]) @@ -2063,7 +2079,7 @@ 0.000000 | HookCallFunction Log::__create_stream(Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird]) 0.000000 | HookCallFunction Log::__create_stream(X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509]) 0.000000 | HookCallFunction Log::__create_stream(mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql]) -0.000000 | HookCallFunction Log::__write(PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T]) +0.000000 | HookCallFunction Log::__write(PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T]) 0.000000 | HookCallFunction Log::add_default_filter(Broker::LOG) 0.000000 | HookCallFunction Log::add_default_filter(Cluster::LOG) 0.000000 | HookCallFunction Log::add_default_filter(Config::LOG) @@ -2244,7 +2260,7 @@ 0.000000 | HookCallFunction Log::create_stream(Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird]) 0.000000 | HookCallFunction Log::create_stream(X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509]) 0.000000 | HookCallFunction Log::create_stream(mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql]) -0.000000 | HookCallFunction Log::write(PacketFilter::LOG, [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T]) +0.000000 | HookCallFunction Log::write(PacketFilter::LOG, [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T]) 0.000000 | HookCallFunction NetControl::check_plugins() 0.000000 | HookCallFunction NetControl::init() 0.000000 | HookCallFunction Notice::want_pp() @@ -2482,6 +2498,7 @@ 0.000000 | HookLoadFile .<...>/add-geodata.zeek 0.000000 | HookLoadFile .<...>/addrs.zeek 0.000000 | HookLoadFile .<...>/analyzer.bif.zeek +0.000000 | HookLoadFile .<...>/api.zeek 0.000000 | HookLoadFile .<...>/archive.sig 0.000000 | HookLoadFile .<...>/ascii.zeek 0.000000 | HookLoadFile .<...>/audio.sig @@ -2563,6 +2580,7 @@ 0.000000 | HookLoadFile .<...>/store.zeek 0.000000 | HookLoadFile .<...>/strings.bif.zeek 0.000000 | HookLoadFile .<...>/sum.zeek +0.000000 | HookLoadFile .<...>/supervisor.bif.zeek 0.000000 | HookLoadFile .<...>/thresholds.zeek 0.000000 | HookLoadFile .<...>/top-k.bif.zeek 0.000000 | HookLoadFile .<...>/topk.zeek @@ -2585,6 +2603,7 @@ 0.000000 | HookLoadFile base<...>/addrs.zeek 0.000000 | HookLoadFile base<...>/analyzer 0.000000 | HookLoadFile base<...>/analyzer.bif.zeek +0.000000 | HookLoadFile base<...>/api.zeek 0.000000 | HookLoadFile base<...>/bif 0.000000 | HookLoadFile base<...>/broker 0.000000 | HookLoadFile base<...>/cluster @@ -2665,6 +2684,8 @@ 0.000000 | HookLoadFile base<...>/strings.bif.zeek 0.000000 | HookLoadFile base<...>/strings.zeek 0.000000 | HookLoadFile base<...>/sumstats +0.000000 | HookLoadFile base<...>/supervisor +0.000000 | HookLoadFile base<...>/supervisor.bif.zeek 0.000000 | HookLoadFile base<...>/syslog 0.000000 | HookLoadFile base<...>/thresholds.zeek 0.000000 | HookLoadFile base<...>/time.zeek @@ -2678,7 +2699,7 @@ 0.000000 | HookLoadFile base<...>/xmpp 0.000000 | HookLoadFile base<...>/zeek.bif.zeek 0.000000 | HookLogInit packet_filter 1/1 {ts (time), node (string), filter (string), init (bool), success (bool)} -0.000000 | HookLogWrite packet_filter [ts=1565053246.404549, node=zeek, filter=ip or not ip, init=T, success=T] +0.000000 | HookLogWrite packet_filter [ts=1571104127.525167, node=zeek, filter=ip or not ip, init=T, success=T] 0.000000 | HookQueueEvent NetControl::init() 0.000000 | HookQueueEvent filter_change_tracking() 0.000000 | HookQueueEvent zeek_init() From 7c08488dfceac3d8a17101be9f449f5194a8a3e8 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 16 Oct 2019 15:12:02 -0700 Subject: [PATCH 04/76] Add skeleton logic for handling supevisor control messages --- scripts/base/frameworks/supervisor/api.zeek | 19 +-- scripts/base/frameworks/supervisor/main.zeek | 8 +- src/Supervisor.cc | 157 +++++++++++++++++-- src/Supervisor.h | 14 +- src/supervisor.bif | 6 +- 5 files changed, 170 insertions(+), 34 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 31480bda51..25b1f274d5 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -4,18 +4,19 @@ module Supervisor; export { - type Status: record { - # TODO: add proper status fields - n: count; - }; - - type NodeConfig: record { - # TODO: add proper config field + type Node: record { + # TODO: add proper config fields name: string; }; + type Status: record { + # TODO: add proper status fields + n: count; + nodes: table[string] of Node; + }; + global status: function(nodes: string &default="all"): Status; - global create: function(config: NodeConfig): string; + global create: function(node: Node): string; global destroy: function(nodes: string): bool; global restart: function(nodes: string &default="all"): bool; @@ -24,7 +25,7 @@ export { global Supervisor::status_request: event(id: count, nodes: string); global Supervisor::status_response: event(id: count, result: Status); - global Supervisor::create_request: event(id: count, config: NodeConfig); + global Supervisor::create_request: event(id: count, node: Node); global Supervisor::create_response: event(id: count, result: string); global Supervisor::destroy_request: event(id: count, nodes: string); diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 3dc0651003..ee5abe0818 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -28,9 +28,9 @@ event Supervisor::status_request(id: count, nodes: string) Broker::publish(topic, Supervisor::status_response, id, res); } -event Supervisor::create_request(id: count, config: NodeConfig) +event Supervisor::create_request(id: count, node: Node) { - local res = Supervisor::create(config); + local res = Supervisor::create(node); local topic = Supervisor::topic_prefix + "/create_response"; Broker::publish(topic, Supervisor::create_response, id, res); } @@ -54,9 +54,9 @@ function Supervisor::status(nodes: string): Status return Supervisor::__status(nodes); } -function create(config: NodeConfig): string +function create(node: Node): string { - return Supervisor::__create(config); + return Supervisor::__create(node); } function destroy(nodes: string): bool diff --git a/src/Supervisor.cc b/src/Supervisor.cc index d6b7620abd..3493ac301f 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -4,6 +4,7 @@ #include #include #include +#include #include "Supervisor.h" #include "Reporter.h" @@ -201,43 +202,171 @@ void zeek::Supervisor::RunStem(std::unique_ptr pipe) fprintf(stderr, "failed to set stem process group: %s\n", strerror(errno)); + std::string msg_buffer; + std::map nodes; + + auto extract_messages = [](std::string* buf) -> std::vector + { + std::vector rval; + + for ( ; ; ) + { + auto msg_end = buf->find('\0'); + + if ( msg_end == std::string::npos ) + // Don't have a full message yet + break; + + auto msg = buf->substr(0, msg_end); + rval.emplace_back(std::move(msg)); + buf->erase(0, msg_end + 1); + } + + return rval; + }; + for ( ; ; ) { - // TODO: make a proper I/O loop w/ message processing via pipe - // TODO: better way to detect loss of parent than polling + // TODO: better way to detect loss of parent than polling ? + + pollfd fds = { pipe->ReadFD(), POLLIN, 0 }; + constexpr auto poll_timeout_ms = 1000; + auto res = poll(&fds, 1, poll_timeout_ms); + + if ( res < 0 ) + { + fprintf(stderr, "poll() failed: %s\n", strerror(errno)); + continue; + } if ( getppid() == 1 ) exit(0); - sleep(5); - printf("Stem wakeup\n"); - write(pipe->WriteFD(), "hi", 2); + if ( res == 0 ) + continue; + + char buf[256]; + int bytes_read = read(pipe->ReadFD(), buf, 256); + + if ( bytes_read == 0 ) + // EOF + exit(0); + + if ( bytes_read < 0 ) + { + fprintf(stderr, "read() failed: %s\n", strerror(errno)); + continue; + } + + msg_buffer.append(buf, bytes_read); + auto msgs = extract_messages(&msg_buffer); + + for ( auto& msg : msgs ) + { + // TODO: improve message format ... + std::vector msg_tokens; + tokenize_string(std::move(msg), " ", &msg_tokens); + const auto& cmd = msg_tokens[0]; + const auto& node_name = msg_tokens[1]; + + if ( cmd == "create" ) + { + auto res = nodes.emplace(node_name, Node{node_name}); + assert(res.second); + // TODO: fork + printf("Stem creating node: %s\n", node_name.data()); + } + else if ( cmd == "destroy" ) + { + auto res = nodes.erase(node_name); + assert(res > 0 ); + printf("Stem destroying node: %s\n", node_name.data()); + // TODO: kill + } + else if ( cmd == "restart" ) + { + auto it = nodes.find(node_name); + assert(it != nodes.end()); + printf("Stem restarting node: %s\n", node_name.data()); + // TODO: re-use logic for destroy then create + } + else + fprintf(stderr, "unknown supervisor message: %s", cmd.data()); + } } } -RecordVal* zeek::Supervisor::Status(const std::string& nodes) +static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) { + zeek::Supervisor::Node rval; + rval.name = node->Lookup("name")->AsString()->CheckString(); + return rval; + } + +static RecordVal* node_struct_to_val(const zeek::Supervisor::Node& node) + { + auto rval = new RecordVal(BifType::Record::Supervisor::Node); + rval->Assign(0, new StringVal(node.name)); + return rval; + } + +RecordVal* zeek::Supervisor::Status(const std::string& node_name) + { + // TODO: handle node classes // TODO: return real status information static auto count = 0; auto rval = new RecordVal(BifType::Record::Supervisor::Status); rval->Assign(0, val_mgr->GetCount(count++)); + + auto tt = BifType::Record::Supervisor::Status->FieldType("nodes"); + auto node_table_val = new TableVal(tt->AsTableType()); + rval->Assign(1, node_table_val); + + for ( const auto& n : nodes ) + { + const auto& node = n.second; + auto key = new StringVal(node.name); + auto val = node_struct_to_val(node); + node_table_val->Assign(key, val); + Unref(key); + } + return rval; } -std::string zeek::Supervisor::Create(const RecordVal* node_config) +std::string zeek::Supervisor::Create(const RecordVal* node_val) { - // TODO: return error msg on fail, or empty on success + auto node = node_val_to_struct(node_val); + + if ( nodes.find(node.name) != nodes.end() ) + return fmt("node with name '%s' already exists", node.name.data()); + + std::string msg = fmt("create %s", node.name.data()); + safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + nodes.emplace(node.name, node); return ""; } -bool zeek::Supervisor::Destroy(const std::string& nodes) +bool zeek::Supervisor::Destroy(const std::string& node_name) { - // TODO: return true if a matching node exists - return false; + // TODO: handle node classes + + if ( ! nodes.erase(node_name) ) + return false; + + std::string msg = fmt("destroy %s", node_name.data()); + safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + return true; } -bool zeek::Supervisor::Restart(const std::string& nodes) +bool zeek::Supervisor::Restart(const std::string& node_name) { - // TODO: return true if a matching node exists - return false; + // TODO: handle node classes + + if ( nodes.find(node_name) == nodes.end() ) + return false; + + std::string msg = fmt("restart %s", node_name.data()); + safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + return true; } diff --git a/src/Supervisor.h b/src/Supervisor.h index 0713b1f5cd..6ab2d15564 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "iosource/IOSource.h" #include "Pipe.h" @@ -24,6 +25,10 @@ public: std::string zeek_exe_path; }; + struct Node { + std::string name; + }; + Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); ~Supervisor(); @@ -33,10 +38,10 @@ public: void ObserveChildSignal(); - RecordVal* Status(const std::string& nodes); - std::string Create(const RecordVal* node_config); - bool Destroy(const std::string& nodes); - bool Restart(const std::string& nodes); + RecordVal* Status(const std::string& node_name); + std::string Create(const RecordVal* node); + bool Destroy(const std::string& node_name); + bool Restart(const std::string& node_name); private: @@ -57,6 +62,7 @@ private: pid_t stem_pid; std::unique_ptr stem_pipe; bro::Flare signal_flare; + std::map nodes; }; extern Supervisor* supervisor; diff --git a/src/supervisor.bif b/src/supervisor.bif index cac895e774..a6188d4687 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -7,16 +7,16 @@ module Supervisor; type Supervisor::Status: record; -type Supervisor::NodeConfig: record; +type Supervisor::Node: record; function Supervisor::__status%(nodes: string%): Supervisor::Status %{ return zeek::supervisor->Status(nodes->CheckString()); %} -function Supervisor::__create%(config: Supervisor::NodeConfig%): string +function Supervisor::__create%(node: Supervisor::Node%): string %{ - auto rval = zeek::supervisor->Create(config->AsRecordVal()); + auto rval = zeek::supervisor->Create(node->AsRecordVal()); return new StringVal(rval); %} From 573e127672faed74390579eb2af9c6762ec1d883 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 16 Oct 2019 20:10:25 -0700 Subject: [PATCH 05/76] Add supervisor node creation --- src/Pipe.cc | 6 ++++ src/Pipe.h | 30 ++++++++++++++++++++ src/Supervisor.cc | 71 +++++++++++++++++++++++++++++++++-------------- src/Supervisor.h | 7 +++-- src/main.cc | 36 ++++++++++++++++-------- 5 files changed, 115 insertions(+), 35 deletions(-) diff --git a/src/Pipe.cc b/src/Pipe.cc index 97ccd7f3c5..9957e25dce 100644 --- a/src/Pipe.cc +++ b/src/Pipe.cc @@ -141,3 +141,9 @@ Pipe& Pipe::operator=(const Pipe& other) status_flags[1] = other.status_flags[1]; return *this; } + +PipePair::PipePair(int flags, int status_flags, int* fds) + : pipes{Pipe(flags, flags, status_flags, status_flags, fds ? fds + 0 : nullptr), + Pipe(flags, flags, status_flags, status_flags, fds ? fds + 2 : nullptr)} + { + } diff --git a/src/Pipe.h b/src/Pipe.h index ad09d58b3f..18786c75ad 100644 --- a/src/Pipe.h +++ b/src/Pipe.h @@ -66,4 +66,34 @@ private: int status_flags[2]; }; +class PipePair { +public: + + PipePair(int flags, int status_flags, int* fds = nullptr); + + Pipe& In() + { return pipes[swapped]; } + + Pipe& Out() + { return pipes[!swapped]; } + + const Pipe& In() const + { return pipes[swapped]; } + + const Pipe& Out() const + { return pipes[!swapped]; } + + int InFD() const + { return In().ReadFD(); } + + int OutFD() const + { return Out().WriteFD(); } + + void Swap() + { swapped = ! swapped; } + + Pipe pipes[2]; + bool swapped = false; +}; + } // namespace bro diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 3493ac301f..030b28b256 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -26,7 +26,7 @@ static RETSIGTYPE supervisor_sig_handler(int signo) } zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, - std::unique_ptr pipe, + std::unique_ptr pipe, pid_t arg_stem_pid) : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) { @@ -128,16 +128,18 @@ void zeek::Supervisor::HandleChildSignal() bro_strerror_r(errno, tmp, sizeof(tmp)); reporter->Error("failed to fork Zeek supervisor stem process: %s\n", tmp); signal_flare.Fire(); - // Sleep to avoid spining too fast in a revival-fail loop. + // Sleep to avoid spinning too fast in a revival-fail loop. sleep(1); } else if ( stem_pid == 0 ) { char stem_env[256]; - safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d", - stem_pipe->ReadFD(), stem_pipe->WriteFD()); + safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d,%d,%d", + stem_pipe->In().ReadFD(), stem_pipe->In().WriteFD(), + stem_pipe->Out().ReadFD(), stem_pipe->Out().WriteFD()); char* env[] = { stem_env, (char*)0 }; - stem_pipe->UnsetFlags(FD_CLOEXEC); + stem_pipe->In().UnsetFlags(FD_CLOEXEC); + stem_pipe->Out().UnsetFlags(FD_CLOEXEC); auto res = execle(config.zeek_exe_path.data(), config.zeek_exe_path.data(), (char*)0, env); @@ -150,6 +152,16 @@ void zeek::Supervisor::HandleChildSignal() else { DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); + // Recreate the desired process hierarchy. + + // TODO: probably a preferred order in which to create nodes + // e.g. logger, manager, proxy, worker + for ( const auto& n : nodes ) + { + const auto& node = n.second; + std::string msg = fmt("create %s", node.name.data()); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + } } } @@ -161,14 +173,11 @@ void zeek::Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, iosource::FD_Set* except) { read->Insert(signal_flare.FD()); - read->Insert(stem_pipe->ReadFD()); + read->Insert(stem_pipe->InFD()); } double zeek::Supervisor::NextTimestamp(double* local_network_time) { - // We're only asked for a timestamp if either (1) a FD was ready - // or (2) we're not idle (and we go idle if when Process is no-op), - // so there's no case where returning -1 to signify a skip will help. return timer_mgr->Time(); } @@ -177,7 +186,7 @@ void zeek::Supervisor::Process() HandleChildSignal(); char buf[256]; - int bytes_read = read(stem_pipe->ReadFD(), buf, 256); + int bytes_read = read(stem_pipe->InFD(), buf, 256); if ( bytes_read > 0 ) { @@ -185,9 +194,10 @@ void zeek::Supervisor::Process() } } -void zeek::Supervisor::RunStem(std::unique_ptr pipe) +std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) { - zeek::set_thread_name("zeek-stem"); + zeek::set_thread_name("zeek.stem"); + pipe->Swap(); // TODO: changing the process group here so that SIGINT to the // supervisor doesn't also get passed to the children. i.e. supervisor // should be in charge of initiating orderly shutdown. But calling @@ -229,7 +239,7 @@ void zeek::Supervisor::RunStem(std::unique_ptr pipe) { // TODO: better way to detect loss of parent than polling ? - pollfd fds = { pipe->ReadFD(), POLLIN, 0 }; + pollfd fds = { pipe->InFD(), POLLIN, 0 }; constexpr auto poll_timeout_ms = 1000; auto res = poll(&fds, 1, poll_timeout_ms); @@ -242,11 +252,15 @@ void zeek::Supervisor::RunStem(std::unique_ptr pipe) if ( getppid() == 1 ) exit(0); + // TODO: periodically send node status updates back to supervisor? + // e.g. can fill in information gaps in the supervisor's node map + // for things such as node PIDs. + if ( res == 0 ) continue; char buf[256]; - int bytes_read = read(pipe->ReadFD(), buf, 256); + int bytes_read = read(pipe->InFD(), buf, 256); if ( bytes_read == 0 ) // EOF @@ -271,10 +285,25 @@ void zeek::Supervisor::RunStem(std::unique_ptr pipe) if ( cmd == "create" ) { - auto res = nodes.emplace(node_name, Node{node_name}); - assert(res.second); - // TODO: fork - printf("Stem creating node: %s\n", node_name.data()); + assert(nodes.find(node_name) == nodes.end()); + auto node_pid = fork(); + + if ( node_pid == -1 ) + fprintf(stderr, "failed to fork Zeek node '%s': %s\n", + node_name.data(), strerror(errno)); + else if ( node_pid == 0 ) + { + // TODO: probably want to return the configuration the + // new node ought to use + zeek::set_thread_name(fmt("zeek.%s", node_name.data())); + return node_name; + } + + Node node; + node.name = node_name; + node.pid = node_pid; + nodes.emplace(node_name, node); + printf("Stem created node: %s %d\n", node_name.data(), node_pid); } else if ( cmd == "destroy" ) { @@ -342,7 +371,7 @@ std::string zeek::Supervisor::Create(const RecordVal* node_val) return fmt("node with name '%s' already exists", node.name.data()); std::string msg = fmt("create %s", node.name.data()); - safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); nodes.emplace(node.name, node); return ""; } @@ -355,7 +384,7 @@ bool zeek::Supervisor::Destroy(const std::string& node_name) return false; std::string msg = fmt("destroy %s", node_name.data()); - safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); return true; } @@ -367,6 +396,6 @@ bool zeek::Supervisor::Restart(const std::string& node_name) return false; std::string msg = fmt("restart %s", node_name.data()); - safe_write(stem_pipe->WriteFD(), msg.data(), msg.size() + 1); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); return true; } diff --git a/src/Supervisor.h b/src/Supervisor.h index 6ab2d15564..2bca83f4de 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -17,7 +17,7 @@ namespace zeek { class Supervisor : public iosource::IOSource { public: - static void RunStem(std::unique_ptr pipe); + static std::string RunStem(std::unique_ptr pipe); struct Config { int num_workers = 1; @@ -27,9 +27,10 @@ public: struct Node { std::string name; + pid_t pid = 0; }; - Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); + Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); ~Supervisor(); @@ -60,7 +61,7 @@ private: Config config; pid_t stem_pid; - std::unique_ptr stem_pipe; + std::unique_ptr stem_pipe; bro::Flare signal_flare; std::map nodes; }; diff --git a/src/main.cc b/src/main.cc index aeae9230fe..08618a719b 100644 --- a/src/main.cc +++ b/src/main.cc @@ -740,12 +740,12 @@ int main(int argc, char** argv) bool use_supervisor = options.supervised_workers > 0; pid_t stem_pid = 0; - std::unique_ptr supervisor_pipe; + std::unique_ptr supervisor_pipe; + std::string stem_spawn = ""; if ( use_supervisor ) { - supervisor_pipe.reset(new bro::Pipe{FD_CLOEXEC, FD_CLOEXEC, - O_NONBLOCK, O_NONBLOCK}); + supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); stem_pid = fork(); if ( stem_pid == -1 ) @@ -756,7 +756,7 @@ int main(int argc, char** argv) } if ( stem_pid == 0 ) - zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + stem_spawn = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } auto zeek_stem_env = getenv("ZEEK_STEM"); @@ -766,20 +766,34 @@ int main(int argc, char** argv) std::vector fd_strings; tokenize_string(zeek_stem_env, ",", &fd_strings); - if ( fd_strings.size() != 2 ) + if ( fd_strings.size() != 4 ) { fprintf(stderr, "invalid ZEEK_STEM environment variable value: '%s'\n", zeek_stem_env); exit(1); } - int fds[2]; - fds[0] = std::stoi(fd_strings[0]); - fds[1] = std::stoi(fd_strings[1]); + int fds[4]; - supervisor_pipe.reset(new bro::Pipe{FD_CLOEXEC, FD_CLOEXEC, - O_NONBLOCK, O_NONBLOCK, fds}); - zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + for ( auto i = 0; i < 4; ++i ) + fds[i] = std::stoi(fd_strings[i]); + + supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); + stem_spawn = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + } + + if ( ! stem_spawn.empty() ) + { + for ( ; ; ) + { + // TODO: this no-op loop is here just to test the process hierarchy + printf("node wakeup: %s\n", stem_spawn.data()); + sleep(2); + + // TODO: this re-parenting check needs to go somewhere proper + if ( getppid() == 1 ) + exit(0); + } } std::set_new_handler(bro_new_handler); From 0d0fe4d1cc9c4fa4ee787031bae218d89be34669 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 18 Oct 2019 12:09:47 -0700 Subject: [PATCH 06/76] Add auto-revival of supervisor leaf nodes --- src/Supervisor.cc | 270 +++++++++++++++++++++++++++++++++++----------- src/Supervisor.h | 2 + 2 files changed, 212 insertions(+), 60 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 030b28b256..878a7461c3 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -18,6 +18,36 @@ extern "C" { #include "setsignal.h" } +struct StemState { + StemState(std::unique_ptr p); + + ~StemState(); + + std::string Run(); + + void Reap(); + + std::string Revive(); + + bool Spawn(zeek::Supervisor::Node* node); + + std::vector ExtractMessages(std::string* buffer) const; + + std::unique_ptr signal_flare; + std::unique_ptr pipe; + std::map nodes; + std::string msg_buffer; +}; + +static StemState* stem_state = nullptr; + +static RETSIGTYPE stem_sig_handler(int signo) + { + printf("Stem received SIGCHLD signal: %d\n", signo); + stem_state->signal_flare->Fire(); + return RETSIGVAL; + } + static RETSIGTYPE supervisor_sig_handler(int signo) { DBG_LOG(DBG_SUPERVISOR, "received SIGCHLD signal: %d", signo); @@ -38,6 +68,8 @@ zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, zeek::Supervisor::~Supervisor() { + setsignal(SIGCHLD, SIG_DFL); + if ( ! stem_pid ) { DBG_LOG(DBG_SUPERVISOR, "shutdown, stem process already exited"); @@ -101,21 +133,24 @@ void zeek::Supervisor::HandleChildSignal() reporter->Error("Supervisor failed to get exit status" " of stem process: %s", tmp); } - else if ( WIFEXITED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", - WEXITSTATUS(status)); - stem_pid = 0; - } - else if ( WIFSIGNALED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", - WTERMSIG(status)); - stem_pid = 0; - } else - reporter->Error("Supervisor failed to get exit status" - " of stem process for unknown reason"); + { + stem_pid = 0; + + if ( WIFEXITED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", + WEXITSTATUS(status)); + } + else if ( WIFSIGNALED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", + WTERMSIG(status)); + } + else + reporter->Error("Supervisor failed to get exit status" + " of stem process for unknown reason"); + } if ( ! stem_pid ) { @@ -194,10 +229,124 @@ void zeek::Supervisor::Process() } } -std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) +StemState::StemState(std::unique_ptr p) + : signal_flare(new bro::Flare()), pipe(std::move(p)) { zeek::set_thread_name("zeek.stem"); pipe->Swap(); + stem_state = this; + setsignal(SIGCHLD, stem_sig_handler); + } + +StemState::~StemState() + { + setsignal(SIGCHLD, SIG_DFL); + } + +void StemState::Reap() + { + for ( auto& n : nodes ) + { + auto& node = n.second; + + if ( ! node.pid ) + continue; + + int status; + auto res = waitpid(node.pid, &status, WNOHANG); + + if ( res == 0 ) + // It's still alive. + continue; + + if ( res == -1 ) + { + fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", + node.name.data(), node.pid, strerror(errno)); + continue; + } + + if ( WIFEXITED(status) ) + { + node.exit_status = WEXITSTATUS(status); + // TODO: may be some cases where the node is intended to exit + printf("node '%s' exited with status %d\n", + node.name.data(), node.exit_status); + } + else if ( WIFSIGNALED(status) ) + { + node.signal_number = WTERMSIG(status); + printf("node '%s' terminated by signal %d\n", + node.name.data(), node.signal_number); + } + else + fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", + node.name.data(), node.pid); + + node.pid = 0; + } + } + +std::string StemState::Revive() + { + for ( auto& n : nodes ) + { + auto& node = n.second; + + if ( node.pid ) + continue; + + if ( Spawn(&node) ) + return node.name; + } + + return ""; + } + +bool StemState::Spawn(zeek::Supervisor::Node* node) + { + auto node_pid = fork(); + + if ( node_pid == -1 ) + { + fprintf(stderr, "failed to fork Zeek node '%s': %s\n", + node->name.data(), strerror(errno)); + return false; + } + + if ( node_pid == 0 ) + { + zeek::set_thread_name(fmt("zeek.%s", node->name.data())); + return true; + } + + node->pid = node_pid; + printf("Stem spawned node: %s (%d)\n", node->name.data(), node->pid); + return false; + } + +std::vector StemState::ExtractMessages(std::string* buffer) const + { + std::vector rval; + + for ( ; ; ) + { + auto msg_end = buffer->find('\0'); + + if ( msg_end == std::string::npos ) + // Don't have any full messages left + break; + + auto msg = buffer->substr(0, msg_end); + rval.emplace_back(std::move(msg)); + buffer->erase(0, msg_end + 1); + } + + return rval; + } + +std::string StemState::Run() + { // TODO: changing the process group here so that SIGINT to the // supervisor doesn't also get passed to the children. i.e. supervisor // should be in charge of initiating orderly shutdown. But calling @@ -212,45 +361,34 @@ std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) fprintf(stderr, "failed to set stem process group: %s\n", strerror(errno)); - std::string msg_buffer; - std::map nodes; - - auto extract_messages = [](std::string* buf) -> std::vector - { - std::vector rval; - - for ( ; ; ) - { - auto msg_end = buf->find('\0'); - - if ( msg_end == std::string::npos ) - // Don't have a full message yet - break; - - auto msg = buf->substr(0, msg_end); - rval.emplace_back(std::move(msg)); - buf->erase(0, msg_end + 1); - } - - return rval; - }; - for ( ; ; ) { // TODO: better way to detect loss of parent than polling ? - pollfd fds = { pipe->InFD(), POLLIN, 0 }; + pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, + { signal_flare->FD(), POLLIN, 0} }; constexpr auto poll_timeout_ms = 1000; - auto res = poll(&fds, 1, poll_timeout_ms); + auto res = poll(fds, 2, poll_timeout_ms); if ( res < 0 ) { - fprintf(stderr, "poll() failed: %s\n", strerror(errno)); - continue; + if ( errno != EINTR ) + { + fprintf(stderr, "poll() failed: %s\n", strerror(errno)); + continue; + } } if ( getppid() == 1 ) + { + // TODO: kill/wait on children exit(0); + } + + auto new_node_name = Revive(); + + if ( ! new_node_name.empty() ) + return new_node_name; // TODO: periodically send node status updates back to supervisor? // e.g. can fill in information gaps in the supervisor's node map @@ -259,6 +397,18 @@ std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) if ( res == 0 ) continue; + if ( signal_flare->Extinguish() ) + { + Reap(); + auto new_node_name = Revive(); + + if ( ! new_node_name.empty() ) + return new_node_name; + } + + if ( ! fds[0].revents ) + continue; + char buf[256]; int bytes_read = read(pipe->InFD(), buf, 256); @@ -273,7 +423,7 @@ std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) } msg_buffer.append(buf, bytes_read); - auto msgs = extract_messages(&msg_buffer); + auto msgs = ExtractMessages(&msg_buffer); for ( auto& msg : msgs ) { @@ -286,24 +436,17 @@ std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) if ( cmd == "create" ) { assert(nodes.find(node_name) == nodes.end()); - auto node_pid = fork(); - - if ( node_pid == -1 ) - fprintf(stderr, "failed to fork Zeek node '%s': %s\n", - node_name.data(), strerror(errno)); - else if ( node_pid == 0 ) - { - // TODO: probably want to return the configuration the - // new node ought to use - zeek::set_thread_name(fmt("zeek.%s", node_name.data())); - return node_name; - } - - Node node; + zeek::Supervisor::Node node; node.name = node_name; - node.pid = node_pid; - nodes.emplace(node_name, node); - printf("Stem created node: %s %d\n", node_name.data(), node_pid); + + if ( Spawn(&node) ) + // TODO: probably want to return the full configuration the + // new node ought to use + return node.name; + + // TODO: get stem printfs going through standard Zeek debug.log + printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); + nodes.emplace(node_name, std::move(node)); } else if ( cmd == "destroy" ) { @@ -323,6 +466,13 @@ std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) fprintf(stderr, "unknown supervisor message: %s", cmd.data()); } } + + } + +std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) + { + StemState ss(std::move(pipe)); + return ss.Run(); } static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) diff --git a/src/Supervisor.h b/src/Supervisor.h index 2bca83f4de..54c1e45f56 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -28,6 +28,8 @@ public: struct Node { std::string name; pid_t pid = 0; + int exit_status = 0; + int signal_number = 0; }; Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); From 0180d47c5a183019d30549ebcb12b85e65688aaf Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 18 Oct 2019 13:09:29 -0700 Subject: [PATCH 07/76] Add backoff logic to supervisor node revival --- src/Supervisor.cc | 26 ++++++++++++++++++++++++++ src/Supervisor.h | 4 ++++ 2 files changed, 30 insertions(+) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 878a7461c3..b996e8bb56 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -289,12 +289,37 @@ void StemState::Reap() std::string StemState::Revive() { + constexpr auto attempts_before_delay_increase = 3; + constexpr auto delay_increase_factor = 2; + constexpr auto reset_revival_state_after = 30; + for ( auto& n : nodes ) { auto& node = n.second; + auto now = std::chrono::steady_clock::now(); + auto revival_reset = std::chrono::seconds(reset_revival_state_after); + auto time_since_spawn = now - node.spawn_time; if ( node.pid ) + { + if ( time_since_spawn > revival_reset ) + { + node.revival_attempts = 0; + node.revival_delay = 1; + } + continue; + } + + auto delay = std::chrono::seconds(node.revival_delay); + + if ( time_since_spawn < delay ) + continue; + + ++node.revival_attempts; + + if ( node.revival_attempts % attempts_before_delay_increase == 0 ) + node.revival_delay *= delay_increase_factor; if ( Spawn(&node) ) return node.name; @@ -321,6 +346,7 @@ bool StemState::Spawn(zeek::Supervisor::Node* node) } node->pid = node_pid; + node->spawn_time = std::chrono::steady_clock::now(); printf("Stem spawned node: %s (%d)\n", node->name.data(), node->pid); return false; } diff --git a/src/Supervisor.h b/src/Supervisor.h index 54c1e45f56..a9209f5471 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "iosource/IOSource.h" @@ -30,6 +31,9 @@ public: pid_t pid = 0; int exit_status = 0; int signal_number = 0; + int revival_attempts = 0; + int revival_delay = 1; + std::chrono::time_point spawn_time; }; Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); From 2bc533f762c43d5c4c84b4d6471ff59b8e772895 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 18 Oct 2019 14:10:31 -0700 Subject: [PATCH 08/76] Add shutdown logic for intermediate supervisor stem process To kill/wait on the supervised leaf nodes before exiting. --- src/Supervisor.cc | 102 +++++++++++++++++++++++++++++++++++++++++++--- src/main.cc | 5 ++- 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index b996e8bb56..a6b7d7002a 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -33,21 +33,39 @@ struct StemState { std::vector ExtractMessages(std::string* buffer) const; + int AliveNodeCount() const; + + void KillNodes(int signal) const; + + void Shutdown(int exit_code); + std::unique_ptr signal_flare; std::unique_ptr pipe; std::map nodes; std::string msg_buffer; + bool shutting_down = false; }; static StemState* stem_state = nullptr; -static RETSIGTYPE stem_sig_handler(int signo) +static RETSIGTYPE stem_sigchld_handler(int signo) { printf("Stem received SIGCHLD signal: %d\n", signo); stem_state->signal_flare->Fire(); return RETSIGVAL; } +static RETSIGTYPE stem_sigterm_handler(int signo) + { + printf("Stem received SIGTERM signal: %d\n", signo); + + if ( ! stem_state->shutting_down ) + stem_state->signal_flare->Fire(); + + stem_state->shutting_down = true; + return RETSIGVAL; + } + static RETSIGTYPE supervisor_sig_handler(int signo) { DBG_LOG(DBG_SUPERVISOR, "received SIGCHLD signal: %d", signo); @@ -235,12 +253,14 @@ StemState::StemState(std::unique_ptr p) zeek::set_thread_name("zeek.stem"); pipe->Swap(); stem_state = this; - setsignal(SIGCHLD, stem_sig_handler); + setsignal(SIGCHLD, stem_sigchld_handler); + setsignal(SIGTERM, stem_sigterm_handler); } StemState::~StemState() { setsignal(SIGCHLD, SIG_DFL); + setsignal(SIGTERM, SIG_DFL); } void StemState::Reap() @@ -371,6 +391,74 @@ std::vector StemState::ExtractMessages(std::string* buffer) const return rval; } +int StemState::AliveNodeCount() const + { + auto rval = 0; + + for ( const auto& n : nodes ) + if ( n.second.pid ) + ++rval; + + return rval; + } + +void StemState::KillNodes(int signal) const + { + for ( const auto& n : nodes ) + { + const auto& node = n.second; + auto kill_res = kill(node.pid, signal); + + if ( kill_res == -1 ) + { + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + fprintf(stderr, "Failed to send signal to node %s: %s", + node.name.data(), tmp); + } + } + } + +void StemState::Shutdown(int exit_code) + { + constexpr auto max_term_attempts = 13; + constexpr auto kill_delay = 2; + + auto kill_attempts = 0; + + for ( ; ; ) + { + auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; + printf("Stem killed nodes with signal %d\n", sig); + KillNodes(sig); + Reap(); + auto nodes_alive = AliveNodeCount(); + + if ( nodes_alive == 0 ) + exit(exit_code); + + printf("Stem nodes still alive %d, sleeping for %d seconds\n", + nodes_alive, kill_delay); + + auto sleep_time_left = kill_delay; + + while ( sleep_time_left > 0 ) + { + sleep_time_left = sleep(sleep_time_left); + + if ( sleep_time_left > 0 ) + { + // Interrupted by signal, so check if children exited + Reap(); + nodes_alive = AliveNodeCount(); + + if ( nodes_alive == 0 ) + exit(exit_code); + } + } + } + } + std::string StemState::Run() { // TODO: changing the process group here so that SIGINT to the @@ -389,8 +477,6 @@ std::string StemState::Run() for ( ; ; ) { - // TODO: better way to detect loss of parent than polling ? - pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, { signal_flare->FD(), POLLIN, 0} }; constexpr auto poll_timeout_ms = 1000; @@ -407,8 +493,9 @@ std::string StemState::Run() if ( getppid() == 1 ) { - // TODO: kill/wait on children - exit(0); + // TODO: better way to detect loss of parent than polling ? + printf("Stem suicide\n"); + Shutdown(13); } auto new_node_name = Revive(); @@ -425,6 +512,9 @@ std::string StemState::Run() if ( signal_flare->Extinguish() ) { + if ( shutting_down ) + Shutdown(0); + Reap(); auto new_node_name = Revive(); diff --git a/src/main.cc b/src/main.cc index 08618a719b..abda0611b7 100644 --- a/src/main.cc +++ b/src/main.cc @@ -792,7 +792,10 @@ int main(int argc, char** argv) // TODO: this re-parenting check needs to go somewhere proper if ( getppid() == 1 ) - exit(0); + { + printf("node suicide: %s\n", stem_spawn.data()); + exit(13); + } } } From 773b39e52ed655a90651bed2b482b83f0a26a509 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 18 Oct 2019 17:57:20 -0700 Subject: [PATCH 09/76] Finish implementing supervisor infrastructure The process hierarchy and all supervisor control commands are now working (e.g. status, create, destroy, restart), but nodes are not currently spawned with the desired configuration parameters so they don't yet operate as real cluster nodes (e.g. worker, logger, manager, proxy). --- scripts/base/frameworks/supervisor/api.zeek | 4 +- src/Supervisor.cc | 480 ++++++++++++-------- src/Supervisor.h | 1 + 3 files changed, 287 insertions(+), 198 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 25b1f274d5..9ebc9b7fa8 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -7,11 +7,11 @@ export { type Node: record { # TODO: add proper config fields name: string; + pid: count &optional; }; type Status: record { - # TODO: add proper status fields - n: count; + # TODO: add more status fields ? nodes: table[string] of Node; }; diff --git a/src/Supervisor.cc b/src/Supervisor.cc index a6b7d7002a..008d1a309e 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -18,35 +18,45 @@ extern "C" { #include "setsignal.h" } -struct StemState { - StemState(std::unique_ptr p); +namespace { +struct Stem { + Stem(std::unique_ptr p); - ~StemState(); + ~Stem(); std::string Run(); + std::string Poll(); + void Reap(); std::string Revive(); bool Spawn(zeek::Supervisor::Node* node); - std::vector ExtractMessages(std::string* buffer) const; - int AliveNodeCount() const; void KillNodes(int signal) const; + void KillNode(const zeek::Supervisor::Node& node, int signal) const; + + void Destroy(zeek::Supervisor::Node* node) const; + + bool Wait(zeek::Supervisor::Node* node, int options) const; + void Shutdown(int exit_code); + void ReportStatus(const zeek::Supervisor::Node& node) const; + std::unique_ptr signal_flare; std::unique_ptr pipe; std::map nodes; std::string msg_buffer; bool shutting_down = false; }; +} -static StemState* stem_state = nullptr; +static Stem* stem_state = nullptr; static RETSIGTYPE stem_sigchld_handler(int signo) { @@ -73,6 +83,26 @@ static RETSIGTYPE supervisor_sig_handler(int signo) return RETSIGVAL; } +static std::vector extract_messages(std::string* buffer) + { + std::vector rval; + + for ( ; ; ) + { + auto msg_end = buffer->find('\0'); + + if ( msg_end == std::string::npos ) + // Don't have any full messages left + break; + + auto msg = buffer->substr(0, msg_end); + rval.emplace_back(std::move(msg)); + buffer->erase(0, msg_end + 1); + } + + return rval; + } + zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, std::unique_ptr pipe, pid_t arg_stem_pid) @@ -242,12 +272,31 @@ void zeek::Supervisor::Process() int bytes_read = read(stem_pipe->InFD(), buf, 256); if ( bytes_read > 0 ) + msg_buffer.append(buf, bytes_read); + + auto msgs = extract_messages(&msg_buffer); + + for ( auto& msg : msgs ) { - DBG_LOG(DBG_SUPERVISOR, "read msg from Stem: %.*s", bytes_read, buf); + DBG_LOG(DBG_SUPERVISOR, "read msg from Stem: %s", msg.data()); + std::vector msg_tokens; + tokenize_string(msg, " ", &msg_tokens); + const auto& type = msg_tokens[0]; + + if ( type == "status" ) + { + const auto& name = msg_tokens[1]; + auto it = nodes.find(name); + + if ( it != nodes.end() ) + it->second.pid = std::stoi(msg_tokens[2]); + } + else + reporter->Error("Supervisor got unknown msg: %s", msg.data()); } } -StemState::StemState(std::unique_ptr p) +Stem::Stem(std::unique_ptr p) : signal_flare(new bro::Flare()), pipe(std::move(p)) { zeek::set_thread_name("zeek.stem"); @@ -255,15 +304,30 @@ StemState::StemState(std::unique_ptr p) stem_state = this; setsignal(SIGCHLD, stem_sigchld_handler); setsignal(SIGTERM, stem_sigterm_handler); + + // TODO: changing the process group here so that SIGINT to the + // supervisor doesn't also get passed to the children. i.e. supervisor + // should be in charge of initiating orderly shutdown. But calling + // just setpgid() like this is technically a race-condition -- need + // to do more work of blocking SIGINT before fork(), unblocking after, + // then also calling setpgid() from parent. And just not doing that + // until more is known whether that's the right SIGINT behavior in + // the first place. + auto res = setpgid(0, 0); + + if ( res == -1 ) + fprintf(stderr, "failed to set stem process group: %s\n", + strerror(errno)); + } -StemState::~StemState() +Stem::~Stem() { setsignal(SIGCHLD, SIG_DFL); setsignal(SIGTERM, SIG_DFL); } -void StemState::Reap() +void Stem::Reap() { for ( auto& n : nodes ) { @@ -272,42 +336,78 @@ void StemState::Reap() if ( ! node.pid ) continue; - int status; - auto res = waitpid(node.pid, &status, WNOHANG); - - if ( res == 0 ) - // It's still alive. - continue; - - if ( res == -1 ) - { - fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", - node.name.data(), node.pid, strerror(errno)); - continue; - } - - if ( WIFEXITED(status) ) - { - node.exit_status = WEXITSTATUS(status); - // TODO: may be some cases where the node is intended to exit - printf("node '%s' exited with status %d\n", - node.name.data(), node.exit_status); - } - else if ( WIFSIGNALED(status) ) - { - node.signal_number = WTERMSIG(status); - printf("node '%s' terminated by signal %d\n", - node.name.data(), node.signal_number); - } - else - fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", - node.name.data(), node.pid); - - node.pid = 0; + Wait(&node, WNOHANG); } } -std::string StemState::Revive() +bool Stem::Wait(zeek::Supervisor::Node* node, int options) const + { + int status; + auto res = waitpid(node->pid, &status, options); + + if ( res == 0 ) + // It's still alive. + return false; + + if ( res == -1 ) + { + fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", + node->name.data(), node->pid, strerror(errno)); + return false; + } + + if ( WIFEXITED(status) ) + { + node->exit_status = WEXITSTATUS(status); + // TODO: may be some cases where the node is intended to exit + printf("node '%s' exited with status %d\n", + node->name.data(), node->exit_status); + } + else if ( WIFSIGNALED(status) ) + { + node->signal_number = WTERMSIG(status); + printf("node '%s' terminated by signal %d\n", + node->name.data(), node->signal_number); + } + else + fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", + node->name.data(), node->pid); + + node->pid = 0; + return true; + } + +void Stem::KillNode(const zeek::Supervisor::Node& node, int signal) const + { + auto kill_res = kill(node.pid, signal); + + if ( kill_res == -1 ) + fprintf(stderr, "Failed to send signal to node %s: %s", + node.name.data(), strerror(errno)); + } + +void Stem::Destroy(zeek::Supervisor::Node* node) const + { + constexpr auto max_term_attempts = 13; + constexpr auto kill_delay = 2; + auto kill_attempts = 0; + + for ( ; ; ) + { + auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; + KillNode(*node, sig); + usleep(10); + + if ( Wait(node, WNOHANG) ) + break; + + printf("Stem waiting to destroy node: %s (%d)\n", + node->name.data(), node->pid); + sleep(kill_delay); + } + } + +std::string Stem::Revive() { constexpr auto attempts_before_delay_increase = 3; constexpr auto delay_increase_factor = 2; @@ -343,12 +443,14 @@ std::string StemState::Revive() if ( Spawn(&node) ) return node.name; + + ReportStatus(node); } return ""; } -bool StemState::Spawn(zeek::Supervisor::Node* node) +bool Stem::Spawn(zeek::Supervisor::Node* node) { auto node_pid = fork(); @@ -371,27 +473,7 @@ bool StemState::Spawn(zeek::Supervisor::Node* node) return false; } -std::vector StemState::ExtractMessages(std::string* buffer) const - { - std::vector rval; - - for ( ; ; ) - { - auto msg_end = buffer->find('\0'); - - if ( msg_end == std::string::npos ) - // Don't have any full messages left - break; - - auto msg = buffer->substr(0, msg_end); - rval.emplace_back(std::move(msg)); - buffer->erase(0, msg_end + 1); - } - - return rval; - } - -int StemState::AliveNodeCount() const +int Stem::AliveNodeCount() const { auto rval = 0; @@ -402,28 +484,16 @@ int StemState::AliveNodeCount() const return rval; } -void StemState::KillNodes(int signal) const +void Stem::KillNodes(int signal) const { for ( const auto& n : nodes ) - { - const auto& node = n.second; - auto kill_res = kill(node.pid, signal); - - if ( kill_res == -1 ) - { - char tmp[256]; - bro_strerror_r(errno, tmp, sizeof(tmp)); - fprintf(stderr, "Failed to send signal to node %s: %s", - node.name.data(), tmp); - } - } + KillNode(n.second, signal); } -void StemState::Shutdown(int exit_code) +void Stem::Shutdown(int exit_code) { constexpr auto max_term_attempts = 13; constexpr auto kill_delay = 2; - auto kill_attempts = 0; for ( ; ; ) @@ -431,6 +501,7 @@ void StemState::Shutdown(int exit_code) auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; printf("Stem killed nodes with signal %d\n", sig); KillNodes(sig); + usleep(10); Reap(); auto nodes_alive = AliveNodeCount(); @@ -459,136 +530,149 @@ void StemState::Shutdown(int exit_code) } } -std::string StemState::Run() +void Stem::ReportStatus(const zeek::Supervisor::Node& node) const { - // TODO: changing the process group here so that SIGINT to the - // supervisor doesn't also get passed to the children. i.e. supervisor - // should be in charge of initiating orderly shutdown. But calling - // just setpgid() like this is technically a race-condition -- need - // to do more work of blocking SIGINT before fork(), unblocking after, - // then also calling setpgid() from parent. And just not doing that - // until more is known whether that's the right SIGINT behavior in - // the first place. - auto res = setpgid(0, 0); - - if ( res == -1 ) - fprintf(stderr, "failed to set stem process group: %s\n", - strerror(errno)); + std::string msg = fmt("status %s %d", node.name.data(), node.pid); + safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); + } +std::string Stem::Run() + { for ( ; ; ) { - pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, - { signal_flare->FD(), POLLIN, 0} }; - constexpr auto poll_timeout_ms = 1000; - auto res = poll(fds, 2, poll_timeout_ms); + auto new_node_name = Poll(); - if ( res < 0 ) + if ( ! new_node_name.empty() ) + return new_node_name; + } + + return ""; + } + +std::string Stem::Poll() + { + pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, + { signal_flare->FD(), POLLIN, 0} }; + constexpr auto poll_timeout_ms = 1000; + auto res = poll(fds, 2, poll_timeout_ms); + + if ( res < 0 ) + { + if ( errno != EINTR ) { - if ( errno != EINTR ) - { - fprintf(stderr, "poll() failed: %s\n", strerror(errno)); - continue; - } + fprintf(stderr, "Stem poll() failed: %s\n", strerror(errno)); + return {}; } + } - if ( getppid() == 1 ) - { - // TODO: better way to detect loss of parent than polling ? - printf("Stem suicide\n"); - Shutdown(13); - } + if ( getppid() == 1 ) + { + // TODO: better way to detect loss of parent than polling ? + // e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux + // or procctl(PROC_PDEATHSIG_CTL) on FreeBSD + printf("Stem suicide\n"); + Shutdown(13); + } + auto new_node_name = Revive(); + + if ( ! new_node_name.empty() ) + return new_node_name; + + if ( res == 0 ) + return {}; + + if ( signal_flare->Extinguish() ) + { + if ( shutting_down ) + Shutdown(0); + + Reap(); auto new_node_name = Revive(); if ( ! new_node_name.empty() ) return new_node_name; - - // TODO: periodically send node status updates back to supervisor? - // e.g. can fill in information gaps in the supervisor's node map - // for things such as node PIDs. - - if ( res == 0 ) - continue; - - if ( signal_flare->Extinguish() ) - { - if ( shutting_down ) - Shutdown(0); - - Reap(); - auto new_node_name = Revive(); - - if ( ! new_node_name.empty() ) - return new_node_name; - } - - if ( ! fds[0].revents ) - continue; - - char buf[256]; - int bytes_read = read(pipe->InFD(), buf, 256); - - if ( bytes_read == 0 ) - // EOF - exit(0); - - if ( bytes_read < 0 ) - { - fprintf(stderr, "read() failed: %s\n", strerror(errno)); - continue; - } - - msg_buffer.append(buf, bytes_read); - auto msgs = ExtractMessages(&msg_buffer); - - for ( auto& msg : msgs ) - { - // TODO: improve message format ... - std::vector msg_tokens; - tokenize_string(std::move(msg), " ", &msg_tokens); - const auto& cmd = msg_tokens[0]; - const auto& node_name = msg_tokens[1]; - - if ( cmd == "create" ) - { - assert(nodes.find(node_name) == nodes.end()); - zeek::Supervisor::Node node; - node.name = node_name; - - if ( Spawn(&node) ) - // TODO: probably want to return the full configuration the - // new node ought to use - return node.name; - - // TODO: get stem printfs going through standard Zeek debug.log - printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); - nodes.emplace(node_name, std::move(node)); - } - else if ( cmd == "destroy" ) - { - auto res = nodes.erase(node_name); - assert(res > 0 ); - printf("Stem destroying node: %s\n", node_name.data()); - // TODO: kill - } - else if ( cmd == "restart" ) - { - auto it = nodes.find(node_name); - assert(it != nodes.end()); - printf("Stem restarting node: %s\n", node_name.data()); - // TODO: re-use logic for destroy then create - } - else - fprintf(stderr, "unknown supervisor message: %s", cmd.data()); - } } + if ( ! fds[0].revents ) + return {}; + + char buf[256]; + int bytes_read = read(pipe->InFD(), buf, 256); + + if ( bytes_read == 0 ) + { + // EOF, supervisor must have exited + printf("Stem EOF\n"); + Shutdown(14); + } + + if ( bytes_read < 0 ) + { + fprintf(stderr, "Stem read() failed: %s\n", strerror(errno)); + return {}; + } + + msg_buffer.append(buf, bytes_read); + auto msgs = extract_messages(&msg_buffer); + + for ( auto& msg : msgs ) + { + // TODO: improve message format ... + std::vector msg_tokens; + tokenize_string(std::move(msg), " ", &msg_tokens); + const auto& cmd = msg_tokens[0]; + const auto& node_name = msg_tokens[1]; + + if ( cmd == "create" ) + { + assert(nodes.find(node_name) == nodes.end()); + zeek::Supervisor::Node node; + node.name = node_name; + + if ( Spawn(&node) ) + // TODO: probably want to return the full configuration the + // new node ought to use + return node.name; + + // TODO: get stem printfs going through standard Zeek debug.log + printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); + auto it = nodes.emplace(node_name, std::move(node)).first; + ReportStatus(it->second); + } + else if ( cmd == "destroy" ) + { + auto it = nodes.find(node_name); + assert(it != nodes.end()); + auto& node = it->second; + printf("Stem destroying node: %s\n", node_name.data()); + Destroy(&node); + nodes.erase(it); + } + else if ( cmd == "restart" ) + { + auto it = nodes.find(node_name); + assert(it != nodes.end()); + auto& node = it->second; + printf("Stem restarting node: %s\n", node_name.data()); + Destroy(&node); + + if ( Spawn(&node) ) + return node.name; + + ReportStatus(node); + } + else + fprintf(stderr, "unknown supervisor message: %s", cmd.data()); + } + + return {}; } std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) { - StemState ss(std::move(pipe)); - return ss.Run(); + Stem s(std::move(pipe)); + return s.Run(); } static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) @@ -602,20 +686,20 @@ static RecordVal* node_struct_to_val(const zeek::Supervisor::Node& node) { auto rval = new RecordVal(BifType::Record::Supervisor::Node); rval->Assign(0, new StringVal(node.name)); + + if ( node.pid ) + rval->Assign(1, val_mgr->GetCount(node.pid)); + return rval; } RecordVal* zeek::Supervisor::Status(const std::string& node_name) { // TODO: handle node classes - // TODO: return real status information - static auto count = 0; auto rval = new RecordVal(BifType::Record::Supervisor::Status); - rval->Assign(0, val_mgr->GetCount(count++)); - auto tt = BifType::Record::Supervisor::Status->FieldType("nodes"); auto node_table_val = new TableVal(tt->AsTableType()); - rval->Assign(1, node_table_val); + rval->Assign(0, node_table_val); for ( const auto& n : nodes ) { @@ -633,6 +717,10 @@ std::string zeek::Supervisor::Create(const RecordVal* node_val) { auto node = node_val_to_struct(node_val); + if ( node.name.find(' ') != std::string::npos ) + return fmt("node names must not contain spaces: '%s'", + node.name.data()); + if ( nodes.find(node.name) != nodes.end() ) return fmt("node with name '%s' already exists", node.name.data()); diff --git a/src/Supervisor.h b/src/Supervisor.h index a9209f5471..5402c23f44 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -70,6 +70,7 @@ private: std::unique_ptr stem_pipe; bro::Flare signal_flare; std::map nodes; + std::string msg_buffer; }; extern Supervisor* supervisor; From de93a060e50cdba11aeae4de03ec7e99060266ae Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Sun, 20 Oct 2019 08:00:10 -0700 Subject: [PATCH 10/76] Improve supervisor signal handling --- src/Supervisor.cc | 173 ++++++++++++++++++++++------------------------ src/Supervisor.h | 2 + 2 files changed, 86 insertions(+), 89 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 008d1a309e..bb18c4243a 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -56,29 +56,26 @@ struct Stem { }; } -static Stem* stem_state = nullptr; +static Stem* stem = nullptr; -static RETSIGTYPE stem_sigchld_handler(int signo) +static RETSIGTYPE stem_sig_handler(int signo) { - printf("Stem received SIGCHLD signal: %d\n", signo); - stem_state->signal_flare->Fire(); - return RETSIGVAL; - } + printf("Stem received signal: %d\n", signo); -static RETSIGTYPE stem_sigterm_handler(int signo) - { - printf("Stem received SIGTERM signal: %d\n", signo); + if ( stem->shutting_down ) + return RETSIGVAL; - if ( ! stem_state->shutting_down ) - stem_state->signal_flare->Fire(); + stem->signal_flare->Fire(); + + if ( signo == SIGTERM ) + stem->shutting_down = true; - stem_state->shutting_down = true; return RETSIGVAL; } static RETSIGTYPE supervisor_sig_handler(int signo) { - DBG_LOG(DBG_SUPERVISOR, "received SIGCHLD signal: %d", signo); + DBG_LOG(DBG_SUPERVISOR, "received signal: %d", signo); zeek::supervisor->ObserveChildSignal(); return RETSIGVAL; } @@ -154,101 +151,100 @@ void zeek::Supervisor::ObserveChildSignal() signal_flare.Fire(); } -void zeek::Supervisor::HandleChildSignal() +void zeek::Supervisor::ReapStem() { if ( ! stem_pid ) return; - auto child_signals = signal_flare.Extinguish(); - - if ( ! child_signals ) - return; - - DBG_LOG(DBG_SUPERVISOR, "handle %d child signals, wait for stem pid %d", - child_signals, stem_pid); - int status; auto res = waitpid(stem_pid, &status, WNOHANG); if ( res == 0 ) - { - DBG_LOG(DBG_SUPERVISOR, "false alarm, stem process still lives"); - } - else if ( res == -1 ) + // Still alive + return; + + if ( res == -1 ) { char tmp[256]; bro_strerror_r(errno, tmp, sizeof(tmp)); reporter->Error("Supervisor failed to get exit status" " of stem process: %s", tmp); + return; + } + + stem_pid = 0; + + if ( WIFEXITED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", + WEXITSTATUS(status)); + } + else if ( WIFSIGNALED(status) ) + { + DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", + WTERMSIG(status)); } else + reporter->Error("Supervisor failed to get exit status" + " of stem process for unknown reason"); + } + +void zeek::Supervisor::HandleChildSignal() + { + signal_flare.Extinguish(); + ReapStem(); + + if ( stem_pid ) + return; + + // Revive the Stem process + // TODO: Stem process needs a way to inform Supervisor not to revive + stem_pid = fork(); + + if ( stem_pid == -1 ) { stem_pid = 0; - - if ( WIFEXITED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process exited with status %d", - WEXITSTATUS(status)); - } - else if ( WIFSIGNALED(status) ) - { - DBG_LOG(DBG_SUPERVISOR, "stem process terminated by signal %d", - WTERMSIG(status)); - } - else - reporter->Error("Supervisor failed to get exit status" - " of stem process for unknown reason"); + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + reporter->Error("failed to fork Zeek supervisor stem process: %s\n", tmp); + signal_flare.Fire(); + // Sleep to avoid spinning too fast in a revival-fail loop. + sleep(1); + return; } - if ( ! stem_pid ) + if ( stem_pid == 0 ) { - // Revive the Stem process - stem_pid = fork(); + // Child stem process needs to exec() + char stem_env[256]; + safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d,%d,%d", + stem_pipe->In().ReadFD(), stem_pipe->In().WriteFD(), + stem_pipe->Out().ReadFD(), stem_pipe->Out().WriteFD()); + char* env[] = { stem_env, (char*)0 }; + stem_pipe->In().UnsetFlags(FD_CLOEXEC); + stem_pipe->Out().UnsetFlags(FD_CLOEXEC); + auto res = execle(config.zeek_exe_path.data(), + config.zeek_exe_path.data(), + (char*)0, env); - if ( stem_pid == -1 ) - { - char tmp[256]; - bro_strerror_r(errno, tmp, sizeof(tmp)); - reporter->Error("failed to fork Zeek supervisor stem process: %s\n", tmp); - signal_flare.Fire(); - // Sleep to avoid spinning too fast in a revival-fail loop. - sleep(1); - } - else if ( stem_pid == 0 ) - { - char stem_env[256]; - safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d,%d,%d", - stem_pipe->In().ReadFD(), stem_pipe->In().WriteFD(), - stem_pipe->Out().ReadFD(), stem_pipe->Out().WriteFD()); - char* env[] = { stem_env, (char*)0 }; - stem_pipe->In().UnsetFlags(FD_CLOEXEC); - stem_pipe->Out().UnsetFlags(FD_CLOEXEC); - auto res = execle(config.zeek_exe_path.data(), - config.zeek_exe_path.data(), - (char*)0, env); - - char tmp[256]; - bro_strerror_r(errno, tmp, sizeof(tmp)); - fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", tmp); - exit(1); - } - else - { - DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); - // Recreate the desired process hierarchy. - - // TODO: probably a preferred order in which to create nodes - // e.g. logger, manager, proxy, worker - for ( const auto& n : nodes ) - { - const auto& node = n.second; - std::string msg = fmt("create %s", node.name.data()); - safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); - } - } + char tmp[256]; + bro_strerror_r(errno, tmp, sizeof(tmp)); + fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", tmp); + exit(1); } - // TODO: Stem process needs a way to inform Supervisor not to revive + // Parent supervisor process resends node configurations to recreate + // the desired process hierarchy + DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); + + // TODO: probably a preferred order in which to create nodes + // e.g. logger, manager, proxy, worker + for ( const auto& n : nodes ) + { + const auto& node = n.second; + std::string msg = fmt("create %s", node.name.data()); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + } } @@ -301,9 +297,9 @@ Stem::Stem(std::unique_ptr p) { zeek::set_thread_name("zeek.stem"); pipe->Swap(); - stem_state = this; - setsignal(SIGCHLD, stem_sigchld_handler); - setsignal(SIGTERM, stem_sigterm_handler); + stem = this; + setsignal(SIGCHLD, stem_sig_handler); + setsignal(SIGTERM, stem_sig_handler); // TODO: changing the process group here so that SIGINT to the // supervisor doesn't also get passed to the children. i.e. supervisor @@ -318,7 +314,6 @@ Stem::Stem(std::unique_ptr p) if ( res == -1 ) fprintf(stderr, "failed to set stem process group: %s\n", strerror(errno)); - } Stem::~Stem() diff --git a/src/Supervisor.h b/src/Supervisor.h index 5402c23f44..04342d220e 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -62,6 +62,8 @@ private: void HandleChildSignal(); + void ReapStem(); + const char* Tag() override { return "zeek::Supervisor"; } From 7a6355f64fc60cfe1e2c589d2e2cd3b64590d608 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 21 Oct 2019 09:12:50 -0700 Subject: [PATCH 11/76] Change supervisor event API to use strings for request IDs --- scripts/base/frameworks/supervisor/api.zeek | 16 ++++++++-------- scripts/base/frameworks/supervisor/main.zeek | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 9ebc9b7fa8..96cf5d3581 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -22,15 +22,15 @@ export { global Supervisor::stop_request: event(); - global Supervisor::status_request: event(id: count, nodes: string); - global Supervisor::status_response: event(id: count, result: Status); + global Supervisor::status_request: event(reqid: string, nodes: string); + global Supervisor::status_response: event(reqid: string, result: Status); - global Supervisor::create_request: event(id: count, node: Node); - global Supervisor::create_response: event(id: count, result: string); + global Supervisor::create_request: event(reqid: string, node: Node); + global Supervisor::create_response: event(reqid: string, result: string); - global Supervisor::destroy_request: event(id: count, nodes: string); - global Supervisor::destroy_response: event(id: count, result: bool); + global Supervisor::destroy_request: event(reqid: string, nodes: string); + global Supervisor::destroy_response: event(reqid: string, result: bool); - global Supervisor::restart_request: event(id: count, nodes: string); - global Supervisor::restart_response: event(id: count, result: bool); + global Supervisor::restart_request: event(reqid: string, nodes: string); + global Supervisor::restart_response: event(reqid: string, result: bool); } diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index ee5abe0818..ae48e415e6 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -21,32 +21,32 @@ event Supervisor::stop_request() terminate(); } -event Supervisor::status_request(id: count, nodes: string) +event Supervisor::status_request(reqid: string, nodes: string) { local res = Supervisor::status(nodes); local topic = Supervisor::topic_prefix + "/status_response"; - Broker::publish(topic, Supervisor::status_response, id, res); + Broker::publish(topic, Supervisor::status_response, reqid, res); } -event Supervisor::create_request(id: count, node: Node) +event Supervisor::create_request(reqid: string, node: Node) { local res = Supervisor::create(node); local topic = Supervisor::topic_prefix + "/create_response"; - Broker::publish(topic, Supervisor::create_response, id, res); + Broker::publish(topic, Supervisor::create_response, reqid, res); } -event Supervisor::destroy_request(id: count, nodes: string) +event Supervisor::destroy_request(reqid: string, nodes: string) { local res = Supervisor::destroy(nodes); local topic = Supervisor::topic_prefix + "/destroy_response"; - Broker::publish(topic, Supervisor::destroy_response, id, res); + Broker::publish(topic, Supervisor::destroy_response, reqid, res); } -event Supervisor::restart_request(id: count, nodes: string) +event Supervisor::restart_request(reqid: string, nodes: string) { local res = Supervisor::restart(nodes); local topic = Supervisor::topic_prefix + "/restart_response"; - Broker::publish(topic, Supervisor::restart_response, id, res); + Broker::publish(topic, Supervisor::restart_response, reqid, res); } function Supervisor::status(nodes: string): Status From 25a8ba99faf0015bd39ab98078d14e9def27fc21 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 21 Oct 2019 09:16:26 -0700 Subject: [PATCH 12/76] Change supervisor event response topic names They now include the request ID at the end of their topic, to possibly help with subscription filtering. --- scripts/base/frameworks/supervisor/main.zeek | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index ae48e415e6..b258be21a2 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -24,28 +24,28 @@ event Supervisor::stop_request() event Supervisor::status_request(reqid: string, nodes: string) { local res = Supervisor::status(nodes); - local topic = Supervisor::topic_prefix + "/status_response"; + local topic = Supervisor::topic_prefix + fmt("/status_response/%s", reqid); Broker::publish(topic, Supervisor::status_response, reqid, res); } event Supervisor::create_request(reqid: string, node: Node) { local res = Supervisor::create(node); - local topic = Supervisor::topic_prefix + "/create_response"; + local topic = Supervisor::topic_prefix + fmt("/create_response/%s", reqid); Broker::publish(topic, Supervisor::create_response, reqid, res); } event Supervisor::destroy_request(reqid: string, nodes: string) { local res = Supervisor::destroy(nodes); - local topic = Supervisor::topic_prefix + "/destroy_response"; + local topic = Supervisor::topic_prefix + fmt("/destroy_response/%s", reqid); Broker::publish(topic, Supervisor::destroy_response, reqid, res); } event Supervisor::restart_request(reqid: string, nodes: string) { local res = Supervisor::restart(nodes); - local topic = Supervisor::topic_prefix + "/restart_response"; + local topic = Supervisor::topic_prefix + fmt("/restart_response/%s", reqid); Broker::publish(topic, Supervisor::restart_response, reqid, res); } From 29f386e388150bd97ff3608271b146186f6c4bb5 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 23 Oct 2019 17:37:53 -0700 Subject: [PATCH 13/76] Implement minimal supervised cluster configuration More aspects of the cluster configuration to get fleshed out later, but a basic cluster like one would use for a live deployment can now be instantiated and run under supervision. The new clusterized-pcap-processing supervisor mode is also not done yet. --- scripts/base/frameworks/cluster/__load__.zeek | 5 + scripts/base/frameworks/cluster/main.zeek | 8 +- scripts/base/frameworks/supervisor/api.zeek | 22 +- scripts/base/frameworks/supervisor/main.zeek | 6 +- src/Net.cc | 10 +- src/Net.h | 2 +- src/Supervisor.cc | 271 +++++++++++++++--- src/Supervisor.h | 22 +- src/main.cc | 49 ++-- src/supervisor.bif | 44 ++- src/util.cc | 36 ++- src/util.h | 6 +- 12 files changed, 390 insertions(+), 91 deletions(-) diff --git a/scripts/base/frameworks/cluster/__load__.zeek b/scripts/base/frameworks/cluster/__load__.zeek index 9effaf835a..a04d6744d2 100644 --- a/scripts/base/frameworks/cluster/__load__.zeek +++ b/scripts/base/frameworks/cluster/__load__.zeek @@ -18,7 +18,12 @@ redef Broker::log_topic = Cluster::rr_log_topic; # Loading the cluster framework requires that a script by this name exists # somewhere in the ZEEKPATH. The only thing in the file should be the # cluster definition in the :zeek:id:`Cluster::nodes` variable. + +@if ( ! Supervisor::__init_cluster() ) +# When running a supervised cluster, Cluster::nodes is instead populated +# from the internal C++-layer directly via the above BIF. @load cluster-layout +@endif @if ( Cluster::node in Cluster::nodes ) diff --git a/scripts/base/frameworks/cluster/main.zeek b/scripts/base/frameworks/cluster/main.zeek index 9040c663e1..c8ff1ce159 100644 --- a/scripts/base/frameworks/cluster/main.zeek +++ b/scripts/base/frameworks/cluster/main.zeek @@ -287,7 +287,13 @@ function is_enabled(): bool function local_node_type(): NodeType { - return is_enabled() ? nodes[node]$node_type : NONE; + if ( ! is_enabled() ) + return NONE; + + if ( node !in nodes ) + return NONE; + + return nodes[node]$node_type; } function node_topic(name: string): string diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 96cf5d3581..7a13a5ce09 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -4,14 +4,32 @@ module Supervisor; export { + type ClusterRole: enum { + NONE, + LOGGER, + MANAGER, + PROXY, + WORKER, + }; + + type ClusterEndpoint: record { + role: ClusterRole; + host: addr; + p: port; + interface: string &optional; + }; + type Node: record { - # TODO: add proper config fields name: string; + interface: string &optional; + cluster: table[string] of ClusterEndpoint &default=table(); + + # TODO: separate node config fields from status fields ? + # TODO: add more status fields ? pid: count &optional; }; type Status: record { - # TODO: add more status fields ? nodes: table[string] of Node; }; diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index b258be21a2..6668524f9d 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -54,17 +54,17 @@ function Supervisor::status(nodes: string): Status return Supervisor::__status(nodes); } -function create(node: Node): string +function Supervisor::create(node: Node): string { return Supervisor::__create(node); } -function destroy(nodes: string): bool +function Supervisor::destroy(nodes: string): bool { return Supervisor::__destroy(nodes); } -function restart(nodes: string): bool +function Supervisor::restart(nodes: string): bool { return Supervisor::__restart(nodes); } diff --git a/src/Net.cc b/src/Net.cc index f5b15be9ba..0e2fad67e6 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -33,6 +33,7 @@ #include "iosource/PktDumper.h" #include "plugin/Manager.h" #include "broker/Manager.h" +#include "Supervisor.h" extern "C" { #include "setsignal.h" @@ -288,6 +289,9 @@ void net_run() while ( iosource_mgr->Size() || (BifConst::exit_only_after_terminate && ! terminating) ) { + if ( zeek::supervised_node && getppid() == 1 ) + zeek_terminate_loop("supervised cluster node was orphaned"); + double ts; iosource::IOSource* src = iosource_mgr->FindSoonest(&ts); @@ -361,13 +365,11 @@ void net_run() current_dispatched = 0; current_iosrc = 0; - // Should we put the signal handling into an IOSource? - extern void termination_signal(); - if ( signal_val == SIGTERM || signal_val == SIGINT ) // We received a signal while processing the // current packet and its related events. - termination_signal(); + // Should we put the signal handling into an IOSource? + zeek_terminate_loop("received termination signal"); if ( ! reading_traces ) // Check whether we have timers scheduled for diff --git a/src/Net.h b/src/Net.h index c841a1fde4..0a5c23a46c 100644 --- a/src/Net.h +++ b/src/Net.h @@ -24,7 +24,7 @@ extern void net_update_time(double new_network_time); extern void net_packet_dispatch(double t, const Packet* pkt, iosource::PktSrc* src_ps); extern void expire_timers(iosource::PktSrc* src_ps = 0); -extern void termination_signal(); +extern void zeek_terminate_loop(const char* reason); // Functions to temporarily suspend processing of live input (network packets // and remote events/state). Turning this is on is sure to lead to data loss! diff --git a/src/Supervisor.cc b/src/Supervisor.cc index bb18c4243a..069ed6ec77 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -14,6 +14,8 @@ #include "zeek-config.h" #include "util.h" +#include "3rdparty/json.hpp" + extern "C" { #include "setsignal.h" } @@ -24,14 +26,14 @@ struct Stem { ~Stem(); - std::string Run(); + zeek::Supervisor::Node* Run(); - std::string Poll(); + zeek::Supervisor::Node* Poll(); + + zeek::Supervisor::Node* Revive(); void Reap(); - std::string Revive(); - bool Spawn(zeek::Supervisor::Node* node); int AliveNodeCount() const; @@ -60,6 +62,7 @@ static Stem* stem = nullptr; static RETSIGTYPE stem_sig_handler(int signo) { + // TODO: signal safety printf("Stem received signal: %d\n", signo); if ( stem->shutting_down ) @@ -75,6 +78,7 @@ static RETSIGTYPE stem_sig_handler(int signo) static RETSIGTYPE supervisor_sig_handler(int signo) { + // TODO: signal safety DBG_LOG(DBG_SUPERVISOR, "received signal: %d", signo); zeek::supervisor->ObserveChildSignal(); return RETSIGVAL; @@ -191,8 +195,15 @@ void zeek::Supervisor::ReapStem() void zeek::Supervisor::HandleChildSignal() { - signal_flare.Extinguish(); - ReapStem(); + bool had_child_signal = signal_flare.Extinguish(); + + if ( had_child_signal ) + { + ReapStem(); + + DBG_LOG(DBG_SUPERVISOR, "processed SIGCHLD %s", + stem_pid ? "(spurious)" : ""); + } if ( stem_pid ) return; @@ -233,9 +244,10 @@ void zeek::Supervisor::HandleChildSignal() exit(1); } + DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); + // Parent supervisor process resends node configurations to recreate // the desired process hierarchy - DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); // TODO: probably a preferred order in which to create nodes // e.g. logger, manager, proxy, worker @@ -402,7 +414,7 @@ void Stem::Destroy(zeek::Supervisor::Node* node) const } } -std::string Stem::Revive() +zeek::Supervisor::Node* Stem::Revive() { constexpr auto attempts_before_delay_increase = 3; constexpr auto delay_increase_factor = 2; @@ -437,12 +449,12 @@ std::string Stem::Revive() node.revival_delay *= delay_increase_factor; if ( Spawn(&node) ) - return node.name; + return new zeek::Supervisor::Node(node); ReportStatus(node); } - return ""; + return {}; } bool Stem::Spawn(zeek::Supervisor::Node* node) @@ -494,10 +506,15 @@ void Stem::Shutdown(int exit_code) for ( ; ; ) { auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; - printf("Stem killed nodes with signal %d\n", sig); - KillNodes(sig); - usleep(10); - Reap(); + + if ( ! nodes.empty() ) + { + KillNodes(sig); + printf("Stem killed nodes with signal %d\n", sig); + usleep(10); + Reap(); + } + auto nodes_alive = AliveNodeCount(); if ( nodes_alive == 0 ) @@ -531,20 +548,20 @@ void Stem::ReportStatus(const zeek::Supervisor::Node& node) const safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } -std::string Stem::Run() +zeek::Supervisor::Node* Stem::Run() { for ( ; ; ) { - auto new_node_name = Poll(); + auto new_node = Poll(); - if ( ! new_node_name.empty() ) - return new_node_name; + if ( new_node ) + return new_node; } - return ""; + return {}; } -std::string Stem::Poll() +zeek::Supervisor::Node* Stem::Poll() { pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, { signal_flare->FD(), POLLIN, 0} }; @@ -569,10 +586,10 @@ std::string Stem::Poll() Shutdown(13); } - auto new_node_name = Revive(); + auto new_node = Revive(); - if ( ! new_node_name.empty() ) - return new_node_name; + if ( new_node ) + return new_node; if ( res == 0 ) return {}; @@ -583,10 +600,10 @@ std::string Stem::Poll() Shutdown(0); Reap(); - auto new_node_name = Revive(); + auto new_node = Revive(); - if ( ! new_node_name.empty() ) - return new_node_name; + if ( new_node ) + return new_node; } if ( ! fds[0].revents ) @@ -615,20 +632,18 @@ std::string Stem::Poll() { // TODO: improve message format ... std::vector msg_tokens; - tokenize_string(std::move(msg), " ", &msg_tokens); + tokenize_string(std::move(msg), " ", &msg_tokens, 2); const auto& cmd = msg_tokens[0]; const auto& node_name = msg_tokens[1]; if ( cmd == "create" ) { + const auto& node_json = msg_tokens[2]; assert(nodes.find(node_name) == nodes.end()); - zeek::Supervisor::Node node; - node.name = node_name; + auto node = zeek::Supervisor::Node::FromJSON(node_json); if ( Spawn(&node) ) - // TODO: probably want to return the full configuration the - // new node ought to use - return node.name; + return new zeek::Supervisor::Node(node); // TODO: get stem printfs going through standard Zeek debug.log printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); @@ -653,7 +668,7 @@ std::string Stem::Poll() Destroy(&node); if ( Spawn(&node) ) - return node.name; + return new zeek::Supervisor::Node(node); ReportStatus(node); } @@ -664,7 +679,7 @@ std::string Stem::Poll() return {}; } -std::string zeek::Supervisor::RunStem(std::unique_ptr pipe) +zeek::Supervisor::Node* zeek::Supervisor::RunStem(std::unique_ptr pipe) { Stem s(std::move(pipe)); return s.Run(); @@ -674,20 +689,192 @@ static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) { zeek::Supervisor::Node rval; rval.name = node->Lookup("name")->AsString()->CheckString(); + auto iface_val = node->Lookup("interface"); + + if ( iface_val ) + rval.interface = iface_val->AsString()->CheckString(); + + auto cluster_table_val = node->Lookup("cluster")->AsTableVal(); + auto cluster_table = cluster_table_val->AsTable(); + auto c = cluster_table->InitForIteration(); + HashKey* k; + TableEntryVal* v; + + while ( (v = cluster_table->NextEntry(k, c)) ) + { + auto key = cluster_table_val->RecoverIndex(k); + auto name = key->Index(0)->AsStringVal()->ToStdString(); + Unref(key); + auto rv = v->Value()->AsRecordVal(); + + zeek::Supervisor::ClusterEndpoint ep; + ep.role = static_cast(rv->Lookup("role")->AsEnum()); + ep.host = rv->Lookup("host")->AsAddr().AsString(); + ep.port = rv->Lookup("p")->AsPortVal()->Port(); + + auto iface = rv->Lookup("interface"); + + if ( iface ) + ep.interface = iface->AsStringVal()->ToStdString(); + + rval.cluster.emplace(name, std::move(ep)); + } + return rval; } static RecordVal* node_struct_to_val(const zeek::Supervisor::Node& node) { - auto rval = new RecordVal(BifType::Record::Supervisor::Node); - rval->Assign(0, new StringVal(node.name)); + auto rt = BifType::Record::Supervisor::Node; + auto rval = new RecordVal(rt); + rval->Assign(rt->FieldOffset("name"), new StringVal(node.name)); + + if ( ! node.interface.empty() ) + rval->Assign(rt->FieldOffset("interface"), + new StringVal(node.interface)); + + auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); + auto cluster_val = new TableVal(tt->AsTableType()); + rval->Assign(rt->FieldOffset("cluster"), cluster_val); + + for ( const auto& e : node.cluster ) + { + auto& name = e.first; + auto& ep = e.second; + auto key = new StringVal(name); + auto ept = BifType::Record::Supervisor::ClusterEndpoint; + auto val = new RecordVal(ept); + + val->Assign(ept->FieldOffset("role"), BifType::Enum::Supervisor::ClusterRole->GetVal(ep.role)); + val->Assign(ept->FieldOffset("host"), new AddrVal(ep.host)); + val->Assign(ept->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); + + if ( ! ep.interface.empty() ) + val->Assign(ept->FieldOffset("interface"), new StringVal(ep.interface)); + + cluster_val->Assign(key, val); + Unref(key); + } if ( node.pid ) - rval->Assign(1, val_mgr->GetCount(node.pid)); + rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(node.pid)); return rval; } +static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) + { + if ( r == "Supervisor::LOGGER" ) + return BifEnum::Supervisor::LOGGER; + if ( r == "Supervisor::MANAGER" ) + return BifEnum::Supervisor::MANAGER; + if ( r == "Supervisor::PROXY" ) + return BifEnum::Supervisor::PROXY; + if ( r == "Supervisor::WORKER" ) + return BifEnum::Supervisor::LOGGER; + + return BifEnum::Supervisor::NONE; + } + +zeek::Supervisor::Node zeek::Supervisor::Node::FromJSON(const std::string& json) + { + zeek::Supervisor::Node rval; + auto j = nlohmann::json::parse(json); + rval.name = j["name"]; + + auto it = j.find("interface"); + + if ( it != j.end() ) + rval.interface = *it; + + auto cluster = j["cluster"]; + + for ( const auto& e : cluster.items() ) + { + Supervisor::ClusterEndpoint ep; + + auto& key = e.key(); + auto& val = e.value(); + + auto role_str = val["role"]; + ep.role = role_str_to_enum(role_str); + + ep.host = val["host"]; + ep.port = val["p"]["port"]; + + auto it = val.find("interface"); + + if ( it != val.end() ) + ep.interface = *it; + + rval.cluster.emplace(key, std::move(ep)); + } + + return rval; + } + +static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRole role) + { + static auto node_type = global_scope()->Lookup("Cluster::NodeType")->AsType()->AsEnumType(); + + switch ( role ) { + case BifEnum::Supervisor::LOGGER: + return node_type->GetVal(node_type->Lookup("Cluster", "LOGGER")); + case BifEnum::Supervisor::MANAGER: + return node_type->GetVal(node_type->Lookup("Cluster", "MANAGER")); + case BifEnum::Supervisor::PROXY: + return node_type->GetVal(node_type->Lookup("Cluster", "PROXY")); + case BifEnum::Supervisor::WORKER: + return node_type->GetVal(node_type->Lookup("Cluster", "WORKER")); + default: + return node_type->GetVal(node_type->Lookup("Cluster", "NONE")); + } + } + +void zeek::Supervisor::Node::InitCluster() + { + auto cluster_node_type = global_scope()->Lookup("Cluster::Node")->AsType()->AsRecordType(); + auto cluster_nodes_id = global_scope()->Lookup("Cluster::nodes"); + auto cluster_manager_is_logger_id = global_scope()->Lookup("Cluster::manager_is_logger"); + auto cluster_nodes = cluster_nodes_id->ID_Val()->AsTableVal(); + auto has_logger = false; + std::string manager_name; + + for ( const auto& e : supervised_node->cluster ) + { + if ( e.second.role == BifEnum::Supervisor::MANAGER ) + manager_name = e.first; + else if ( e.second.role == BifEnum::Supervisor::LOGGER ) + has_logger = true; + } + + for ( const auto& e : supervised_node->cluster ) + { + const auto& node_name = e.first; + const auto& ep = e.second; + auto key = new StringVal(node_name); + auto val = new RecordVal(cluster_node_type); + + auto node_type = supervisor_role_to_cluster_node_type(ep.role); + val->Assign(cluster_node_type->FieldOffset("node_type"), node_type); + val->Assign(cluster_node_type->FieldOffset("ip"), new AddrVal(ep.host)); + val->Assign(cluster_node_type->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); + + if ( ! ep.interface.empty() ) + val->Assign(cluster_node_type->FieldOffset("interface"), + new StringVal(ep.interface)); + + if ( ! manager_name.empty() && ep.role != BifEnum::Supervisor::MANAGER ) + val->Assign(cluster_node_type->FieldOffset("manager"), + new StringVal(manager_name)); + + cluster_nodes->Assign(key, val); + Unref(key); + } + + cluster_manager_is_logger_id->SetVal(val_mgr->GetBool(! has_logger)); + } + RecordVal* zeek::Supervisor::Status(const std::string& node_name) { // TODO: handle node classes @@ -708,7 +895,7 @@ RecordVal* zeek::Supervisor::Status(const std::string& node_name) return rval; } -std::string zeek::Supervisor::Create(const RecordVal* node_val) +std::string zeek::Supervisor::Create(RecordVal* node_val) { auto node = node_val_to_struct(node_val); @@ -719,7 +906,13 @@ std::string zeek::Supervisor::Create(const RecordVal* node_val) if ( nodes.find(node.name) != nodes.end() ) return fmt("node with name '%s' already exists", node.name.data()); - std::string msg = fmt("create %s", node.name.data()); + auto re = new RE_Matcher("^_"); + auto json_val = node_val->ToJSON(false, re); + auto json_str = json_val->ToStdString(); + delete re; + Unref(json_val); + + std::string msg = fmt("create %s %s", node.name.data(), json_str.data()); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); nodes.emplace(node.name, node); return ""; diff --git a/src/Supervisor.h b/src/Supervisor.h index 04342d220e..62f7198656 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -12,22 +12,35 @@ #include "iosource/IOSource.h" #include "Pipe.h" #include "Flare.h" +#include "NetVar.h" namespace zeek { class Supervisor : public iosource::IOSource { public: - static std::string RunStem(std::unique_ptr pipe); - struct Config { int num_workers = 1; std::vector pcaps; std::string zeek_exe_path; }; + struct ClusterEndpoint { + BifEnum::Supervisor::ClusterRole role; + std::string host; + int port; + std::string interface; + }; + struct Node { + static Node FromJSON(const std::string& json); + + static void InitCluster(); + std::string name; + std::string interface; + std::map cluster; + pid_t pid = 0; int exit_status = 0; int signal_number = 0; @@ -36,6 +49,8 @@ public: std::chrono::time_point spawn_time; }; + static Node* RunStem(std::unique_ptr pipe); + Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); ~Supervisor(); @@ -46,7 +61,7 @@ public: void ObserveChildSignal(); RecordVal* Status(const std::string& node_name); - std::string Create(const RecordVal* node); + std::string Create(RecordVal* node); bool Destroy(const std::string& node_name); bool Restart(const std::string& node_name); @@ -76,5 +91,6 @@ private: }; extern Supervisor* supervisor; +extern Supervisor::Node* supervised_node; } // namespace zeek diff --git a/src/main.cc b/src/main.cc index abda0611b7..764b129344 100644 --- a/src/main.cc +++ b/src/main.cc @@ -94,6 +94,7 @@ zeekygen::Manager* zeekygen_mgr = 0; iosource::Manager* iosource_mgr = 0; bro_broker::Manager* broker_mgr = 0; zeek::Supervisor* zeek::supervisor = 0; +zeek::Supervisor::Node* zeek::supervised_node = 0; std::vector zeek_script_prefixes; Stmt* stmts; @@ -613,6 +614,7 @@ void terminate_bro() delete file_mgr; // broker_mgr is deleted via iosource_mgr // supervisor is deleted via iosource_mgr + delete zeek::supervised_node; delete iosource_mgr; delete log_mgr; delete reporter; @@ -623,11 +625,11 @@ void terminate_bro() reporter = 0; } -void termination_signal() +void zeek_terminate_loop(const char* reason) { - set_processing_status("TERMINATING", "termination_signal"); + set_processing_status("TERMINATING", reason); + reporter->Info("%s", reason); - reporter->Info("received termination signal"); net_get_final_stats(); done_with_network(); net_delete(); @@ -738,13 +740,13 @@ int main(int argc, char** argv) exit(0); } - bool use_supervisor = options.supervised_workers > 0; + auto use_supervisor = [&]() -> bool { return options.supervised_workers > 0; }; pid_t stem_pid = 0; std::unique_ptr supervisor_pipe; - std::string stem_spawn = ""; - if ( use_supervisor ) + if ( use_supervisor() ) { + // TODO: the SIGCHLD handler should be set before fork() supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); stem_pid = fork(); @@ -756,7 +758,7 @@ int main(int argc, char** argv) } if ( stem_pid == 0 ) - stem_spawn = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } auto zeek_stem_env = getenv("ZEEK_STEM"); @@ -779,22 +781,27 @@ int main(int argc, char** argv) fds[i] = std::stoi(fd_strings[i]); supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); - stem_spawn = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } - if ( ! stem_spawn.empty() ) + if ( zeek::supervised_node ) { - for ( ; ; ) - { - // TODO: this no-op loop is here just to test the process hierarchy - printf("node wakeup: %s\n", stem_spawn.data()); - sleep(2); + // TODO: possibly can inherit some command-line options? + // In case stem gets revived via exec(), would need to pass along + // original arguments to it. + options = {}; + const auto& node_name = zeek::supervised_node->name; - // TODO: this re-parenting check needs to go somewhere proper - if ( getppid() == 1 ) + if ( ! zeek::supervised_node->interface.empty() ) + options.interfaces.emplace_back(zeek::supervised_node->interface); + + if ( ! zeek::supervised_node->cluster.empty() ) + { + if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) { - printf("node suicide: %s\n", stem_spawn.data()); - exit(13); + fprintf(stderr, "cluster node %s failed to setenv: %s\n", + node_name.data(), strerror(errno)); + exit(1); } } } @@ -862,7 +869,7 @@ int main(int argc, char** argv) if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) { - if ( use_supervisor ) + if ( use_supervisor() ) debug_log_name = "debug-supervisor"; else debug_log_name = "debug"; @@ -872,7 +879,7 @@ int main(int argc, char** argv) } #endif - if ( use_supervisor ) + if ( use_supervisor() ) { zeek::Supervisor::Config cfg = {}; cfg.pcaps = options.pcap_files; @@ -939,7 +946,7 @@ int main(int argc, char** argv) options.interfaces.size() == 0 && options.identifier_to_print.empty() && ! command_line_policy && ! options.print_plugins && - ! use_supervisor ) + ! use_supervisor() && ! zeek::supervised_node ) add_input_file("-"); for ( const auto& script_option : options.script_options_to_set ) diff --git a/src/supervisor.bif b/src/supervisor.bif index a6188d4687..b6c7b24914 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -6,30 +6,72 @@ module Supervisor; +enum ClusterRole %{ + NONE, + LOGGER, + MANAGER, + PROXY, + WORKER, +%} + +type Supervisor::ClusterEndpoint: record; type Supervisor::Status: record; type Supervisor::Node: record; function Supervisor::__status%(nodes: string%): Supervisor::Status %{ + if ( ! zeek::supervisor ) + { + builtin_error("supervisor mode not enabled"); + return new RecordVal(BifType::Record::Supervisor::Status); + } + return zeek::supervisor->Status(nodes->CheckString()); %} function Supervisor::__create%(node: Supervisor::Node%): string %{ + if ( ! zeek::supervisor ) + { + builtin_error("supervisor mode not enabled"); + return new StringVal("supervisor mode not enabled"); + } + auto rval = zeek::supervisor->Create(node->AsRecordVal()); return new StringVal(rval); %} function Supervisor::__destroy%(nodes: string%): bool %{ + if ( ! zeek::supervisor ) + { + builtin_error("supervisor mode not enabled"); + return val_mgr->GetBool(false); + } + auto rval = zeek::supervisor->Destroy(nodes->CheckString()); return val_mgr->GetBool(rval); %} function Supervisor::__restart%(nodes: string%): bool %{ + if ( ! zeek::supervisor ) + { + builtin_error("supervisor mode not enabled"); + return val_mgr->GetBool(false); + } + auto rval = zeek::supervisor->Restart(nodes->CheckString()); return val_mgr->GetBool(rval); %} -# TODO: BIFs for "restart", "add", "remove" operations +function Supervisor::__init_cluster%(%): bool + %{ + if ( zeek::supervised_node && ! zeek::supervised_node->cluster.empty() ) + { + zeek::supervised_node->InitCluster(); + return val_mgr->GetBool(true); + } + + return val_mgr->GetBool(false); + %} diff --git a/src/util.cc b/src/util.cc index d4cfb42f9b..c16c4980de 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1165,17 +1165,22 @@ string flatten_script_name(const string& name, const string& prefix) } vector* tokenize_string(string input, const string& delim, - vector* rval) + vector* rval, int limit) { if ( ! rval ) rval = new vector(); size_t n; + auto found = 0; while ( (n = input.find(delim)) != string::npos ) { + ++found; rval->push_back(input.substr(0, n)); input.erase(0, n + 1); + + if ( limit && found == limit ) + break; } rval->push_back(input); @@ -1456,7 +1461,7 @@ void terminate_processing() } extern const char* proc_status_file; -void _set_processing_status(const char* status) +void set_processing_status(const char* status, const char* reason) { if ( ! proc_status_file ) return; @@ -1483,20 +1488,27 @@ void _set_processing_status(const char* status) return; } - int len = strlen(status); - while ( len ) + auto write_str = [](int fd, const char* s) { - int n = write(fd, status, len); + int len = strlen(s); + while ( len ) + { + int n = write(fd, s, len); - if ( n < 0 && errno != EINTR && errno != EAGAIN ) - // Ignore errors, as they're too difficult to - // safely report here. - break; + if ( n < 0 && errno != EINTR && errno != EAGAIN ) + // Ignore errors, as they're too difficult to + // safely report here. + break; - status += n; - len -= n; - } + s += n; + len -= n; + } + }; + write_str(fd, status); + write_str(fd, " ["); + write_str(fd, reason); + write_str(fd, "]\n"); safe_close(fd); errno = old_errno; diff --git a/src/util.h b/src/util.h index 07d33bbef7..bf4204d585 100644 --- a/src/util.h +++ b/src/util.h @@ -147,7 +147,7 @@ inline std::string get_escaped_string(const std::string& str, bool escape_all) std::vector* tokenize_string(std::string input, const std::string& delim, - std::vector* rval = 0); + std::vector* rval = 0, int limit = 0); extern char* copy_string(const char* s); extern int streq(const char* s1, const char* s2); @@ -411,9 +411,7 @@ void terminate_processing(); // Sets the current status of the Bro process to the given string. // If the option --status-file has been set, this is written into // the the corresponding file. Otherwise, the function is a no-op. -#define set_processing_status(status, location) \ - _set_processing_status(status " [" location "]\n"); -void _set_processing_status(const char* status); +void set_processing_status(const char* status, const char* reason); // Current timestamp, from a networking perspective, not a wall-clock // perspective. In particular, if we're reading from a savefile this From c43ffc14fe54473a6c4d4253b9e04e2abe9a85d8 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 5 Nov 2019 14:12:55 -0800 Subject: [PATCH 14/76] Fix misc. Supervisor bugs - Stem environment wasn't inherited if it needs to be exec()'d - Fix node creation message format when reviving Stem via exec() - Cluster node type for workers was set wrong --- src/Supervisor.cc | 170 ++++++++++++++++++++++++++-------------------- src/Supervisor.h | 7 +- 2 files changed, 101 insertions(+), 76 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 069ed6ec77..f3b517c7ae 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -104,6 +104,12 @@ static std::vector extract_messages(std::string* buffer) return rval; } +static std::string make_create_message(const zeek::Supervisor::Node& node) + { + auto json_str = node.ToJSON(); + return fmt("create %s %s", node.name.data(), json_str.data()); + } + zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, std::unique_ptr pipe, pid_t arg_stem_pid) @@ -227,20 +233,25 @@ void zeek::Supervisor::HandleChildSignal() if ( stem_pid == 0 ) { // Child stem process needs to exec() - char stem_env[256]; - safe_snprintf(stem_env, sizeof(stem_env), "ZEEK_STEM=%d,%d,%d,%d", + auto stem_env = fmt("%d,%d,%d,%d", stem_pipe->In().ReadFD(), stem_pipe->In().WriteFD(), stem_pipe->Out().ReadFD(), stem_pipe->Out().WriteFD()); - char* env[] = { stem_env, (char*)0 }; + + if ( setenv("ZEEK_STEM", stem_env, true) == -1 ) + { + fprintf(stderr, "setenv(ZEEK_STEM) failed: %s\n", + strerror(errno)); + exit(1); + } + stem_pipe->In().UnsetFlags(FD_CLOEXEC); stem_pipe->Out().UnsetFlags(FD_CLOEXEC); - auto res = execle(config.zeek_exe_path.data(), - config.zeek_exe_path.data(), - (char*)0, env); + auto res = execl(config.zeek_exe_path.data(), + config.zeek_exe_path.data(), + (char*)0); - char tmp[256]; - bro_strerror_r(errno, tmp, sizeof(tmp)); - fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", tmp); + fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", + strerror(errno)); exit(1); } @@ -254,12 +265,11 @@ void zeek::Supervisor::HandleChildSignal() for ( const auto& n : nodes ) { const auto& node = n.second; - std::string msg = fmt("create %s", node.name.data()); + auto msg = make_create_message(node); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); } } - void zeek::Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, iosource::FD_Set* except) { @@ -685,7 +695,21 @@ zeek::Supervisor::Node* zeek::Supervisor::RunStem(std::unique_ptr return s.Run(); } -static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) +static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) + { + if ( r == "Supervisor::LOGGER" ) + return BifEnum::Supervisor::LOGGER; + if ( r == "Supervisor::MANAGER" ) + return BifEnum::Supervisor::MANAGER; + if ( r == "Supervisor::PROXY" ) + return BifEnum::Supervisor::PROXY; + if ( r == "Supervisor::WORKER" ) + return BifEnum::Supervisor::WORKER; + + return BifEnum::Supervisor::NONE; + } + +zeek::Supervisor::Node zeek::Supervisor::Node::FromRecord(const RecordVal* node) { zeek::Supervisor::Node rval; rval.name = node->Lookup("name")->AsString()->CheckString(); @@ -723,59 +747,6 @@ static zeek::Supervisor::Node node_val_to_struct(const RecordVal* node) return rval; } -static RecordVal* node_struct_to_val(const zeek::Supervisor::Node& node) - { - auto rt = BifType::Record::Supervisor::Node; - auto rval = new RecordVal(rt); - rval->Assign(rt->FieldOffset("name"), new StringVal(node.name)); - - if ( ! node.interface.empty() ) - rval->Assign(rt->FieldOffset("interface"), - new StringVal(node.interface)); - - auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); - auto cluster_val = new TableVal(tt->AsTableType()); - rval->Assign(rt->FieldOffset("cluster"), cluster_val); - - for ( const auto& e : node.cluster ) - { - auto& name = e.first; - auto& ep = e.second; - auto key = new StringVal(name); - auto ept = BifType::Record::Supervisor::ClusterEndpoint; - auto val = new RecordVal(ept); - - val->Assign(ept->FieldOffset("role"), BifType::Enum::Supervisor::ClusterRole->GetVal(ep.role)); - val->Assign(ept->FieldOffset("host"), new AddrVal(ep.host)); - val->Assign(ept->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); - - if ( ! ep.interface.empty() ) - val->Assign(ept->FieldOffset("interface"), new StringVal(ep.interface)); - - cluster_val->Assign(key, val); - Unref(key); - } - - if ( node.pid ) - rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(node.pid)); - - return rval; - } - -static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) - { - if ( r == "Supervisor::LOGGER" ) - return BifEnum::Supervisor::LOGGER; - if ( r == "Supervisor::MANAGER" ) - return BifEnum::Supervisor::MANAGER; - if ( r == "Supervisor::PROXY" ) - return BifEnum::Supervisor::PROXY; - if ( r == "Supervisor::WORKER" ) - return BifEnum::Supervisor::LOGGER; - - return BifEnum::Supervisor::NONE; - } - zeek::Supervisor::Node zeek::Supervisor::Node::FromJSON(const std::string& json) { zeek::Supervisor::Node rval; @@ -813,6 +784,57 @@ zeek::Supervisor::Node zeek::Supervisor::Node::FromJSON(const std::string& json) return rval; } +std::string zeek::Supervisor::Node::ToJSON() const + { + auto re = new RE_Matcher("^_"); + auto node_val = ToRecord(); + auto json_val = node_val->ToJSON(false, re); + auto rval = json_val->ToStdString(); + delete re; + Unref(node_val); + Unref(json_val); + return rval; + } + +RecordVal* zeek::Supervisor::Node::ToRecord() const + { + auto rt = BifType::Record::Supervisor::Node; + auto rval = new RecordVal(rt); + rval->Assign(rt->FieldOffset("name"), new StringVal(name)); + + if ( ! interface.empty() ) + rval->Assign(rt->FieldOffset("interface"), + new StringVal(interface)); + + auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); + auto cluster_val = new TableVal(tt->AsTableType()); + rval->Assign(rt->FieldOffset("cluster"), cluster_val); + + for ( const auto& e : cluster ) + { + auto& name = e.first; + auto& ep = e.second; + auto key = new StringVal(name); + auto ept = BifType::Record::Supervisor::ClusterEndpoint; + auto val = new RecordVal(ept); + + val->Assign(ept->FieldOffset("role"), BifType::Enum::Supervisor::ClusterRole->GetVal(ep.role)); + val->Assign(ept->FieldOffset("host"), new AddrVal(ep.host)); + val->Assign(ept->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); + + if ( ! ep.interface.empty() ) + val->Assign(ept->FieldOffset("interface"), new StringVal(ep.interface)); + + cluster_val->Assign(key, val); + Unref(key); + } + + if ( pid ) + rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(pid)); + + return rval; + } + static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRole role) { static auto node_type = global_scope()->Lookup("Cluster::NodeType")->AsType()->AsEnumType(); @@ -887,7 +909,7 @@ RecordVal* zeek::Supervisor::Status(const std::string& node_name) { const auto& node = n.second; auto key = new StringVal(node.name); - auto val = node_struct_to_val(node); + auto val = node.ToRecord(); node_table_val->Assign(key, val); Unref(key); } @@ -895,10 +917,14 @@ RecordVal* zeek::Supervisor::Status(const std::string& node_name) return rval; } -std::string zeek::Supervisor::Create(RecordVal* node_val) +std::string zeek::Supervisor::Create(const RecordVal* node_val) { - auto node = node_val_to_struct(node_val); + auto node = zeek::Supervisor::Node::FromRecord(node_val); + return Create(node); + } +std::string zeek::Supervisor::Create(const zeek::Supervisor::Node& node) + { if ( node.name.find(' ') != std::string::npos ) return fmt("node names must not contain spaces: '%s'", node.name.data()); @@ -906,13 +932,7 @@ std::string zeek::Supervisor::Create(RecordVal* node_val) if ( nodes.find(node.name) != nodes.end() ) return fmt("node with name '%s' already exists", node.name.data()); - auto re = new RE_Matcher("^_"); - auto json_val = node_val->ToJSON(false, re); - auto json_str = json_val->ToStdString(); - delete re; - Unref(json_val); - - std::string msg = fmt("create %s %s", node.name.data(), json_str.data()); + auto msg = make_create_message(node); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); nodes.emplace(node.name, node); return ""; diff --git a/src/Supervisor.h b/src/Supervisor.h index 62f7198656..198a751913 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -33,10 +33,14 @@ public: }; struct Node { + static Node FromRecord(const RecordVal* node_val); static Node FromJSON(const std::string& json); static void InitCluster(); + std::string ToJSON() const; + RecordVal* ToRecord() const; + std::string name; std::string interface; std::map cluster; @@ -61,7 +65,8 @@ public: void ObserveChildSignal(); RecordVal* Status(const std::string& node_name); - std::string Create(RecordVal* node); + std::string Create(const RecordVal* node); + std::string Create(const Supervisor::Node& node); bool Destroy(const std::string& node_name); bool Restart(const std::string& node_name); From 8aa77436f9acbf9cc6f317cc588fa5935e07a98b Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 5 Nov 2019 19:29:43 -0800 Subject: [PATCH 15/76] Refactor misc. Supervisor code E.g. mostly to start taking advantage of C++17 things like std::optional. Also IntrusivePtr. --- src/Net.cc | 7 ++- src/Net.h | 6 +- src/Supervisor.cc | 144 ++++++++++++++++++++++------------------------ src/Supervisor.h | 8 ++- src/main.cc | 72 +++++++++++------------ 5 files changed, 116 insertions(+), 121 deletions(-) diff --git a/src/Net.cc b/src/Net.cc index 0e2fad67e6..1009f5d3de 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -148,7 +148,8 @@ void net_update_time(double new_network_time) void net_init(const std::vector& interfaces, const std::vector& pcap_input_files, - const std::string& pcap_output_file, bool do_watchdog) + const std::optional& pcap_output_file, + bool do_watchdog) { if ( ! pcap_input_files.empty() ) { @@ -189,9 +190,9 @@ void net_init(const std::vector& interfaces, // a timer. reading_traces = reading_live = 0; - if ( ! pcap_output_file.empty() ) + if ( pcap_output_file ) { - const char* writefile = pcap_output_file.data(); + const char* writefile = pcap_output_file->data(); pkt_dumper = iosource_mgr->OpenPktDumper(writefile, false); assert(pkt_dumper); diff --git a/src/Net.h b/src/Net.h index 0a5c23a46c..dab0014dd5 100644 --- a/src/Net.h +++ b/src/Net.h @@ -4,6 +4,7 @@ #include #include +#include #include "net_util.h" #include "util.h" @@ -14,8 +15,9 @@ #include "iosource/PktDumper.h" extern void net_init(const std::vector& interfaces, - const std::vector& pcap_input_files, - const std::string& pcap_output_file, bool do_watchdog); + const std::vector& pcap_input_files, + const std::optional& pcap_output_file, + bool do_watchdog); extern void net_run(); extern void net_get_final_stats(); extern void net_finish(int drain_events); diff --git a/src/Supervisor.cc b/src/Supervisor.cc index f3b517c7ae..bec830237b 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -20,39 +20,41 @@ extern "C" { #include "setsignal.h" } +using namespace zeek; + namespace { struct Stem { Stem(std::unique_ptr p); ~Stem(); - zeek::Supervisor::Node* Run(); + Supervisor::Node* Run(); - zeek::Supervisor::Node* Poll(); + Supervisor::Node* Poll(); - zeek::Supervisor::Node* Revive(); + Supervisor::Node* Revive(); void Reap(); - bool Spawn(zeek::Supervisor::Node* node); + bool Spawn(Supervisor::Node* node); int AliveNodeCount() const; void KillNodes(int signal) const; - void KillNode(const zeek::Supervisor::Node& node, int signal) const; + void KillNode(const Supervisor::Node& node, int signal) const; - void Destroy(zeek::Supervisor::Node* node) const; + void Destroy(Supervisor::Node* node) const; - bool Wait(zeek::Supervisor::Node* node, int options) const; + bool Wait(Supervisor::Node* node, int options) const; void Shutdown(int exit_code); - void ReportStatus(const zeek::Supervisor::Node& node) const; + void ReportStatus(const Supervisor::Node& node) const; std::unique_ptr signal_flare; std::unique_ptr pipe; - std::map nodes; + std::map nodes; std::string msg_buffer; bool shutting_down = false; }; @@ -80,7 +82,7 @@ static RETSIGTYPE supervisor_sig_handler(int signo) { // TODO: signal safety DBG_LOG(DBG_SUPERVISOR, "received signal: %d", signo); - zeek::supervisor->ObserveChildSignal(); + supervisor->ObserveChildSignal(); return RETSIGVAL; } @@ -104,13 +106,13 @@ static std::vector extract_messages(std::string* buffer) return rval; } -static std::string make_create_message(const zeek::Supervisor::Node& node) +static std::string make_create_message(const Supervisor::Node& node) { auto json_str = node.ToJSON(); return fmt("create %s %s", node.name.data(), json_str.data()); } -zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, +Supervisor::Supervisor(Supervisor::Config cfg, std::unique_ptr pipe, pid_t arg_stem_pid) : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) @@ -121,7 +123,7 @@ zeek::Supervisor::Supervisor(zeek::Supervisor::Config cfg, SetIdle(true); } -zeek::Supervisor::~Supervisor() +Supervisor::~Supervisor() { setsignal(SIGCHLD, SIG_DFL); @@ -156,12 +158,12 @@ zeek::Supervisor::~Supervisor() } } -void zeek::Supervisor::ObserveChildSignal() +void Supervisor::ObserveChildSignal() { signal_flare.Fire(); } -void zeek::Supervisor::ReapStem() +void Supervisor::ReapStem() { if ( ! stem_pid ) return; @@ -199,7 +201,7 @@ void zeek::Supervisor::ReapStem() " of stem process for unknown reason"); } -void zeek::Supervisor::HandleChildSignal() +void Supervisor::HandleChildSignal() { bool had_child_signal = signal_flare.Extinguish(); @@ -270,19 +272,19 @@ void zeek::Supervisor::HandleChildSignal() } } -void zeek::Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, +void Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, iosource::FD_Set* except) { read->Insert(signal_flare.FD()); read->Insert(stem_pipe->InFD()); } -double zeek::Supervisor::NextTimestamp(double* local_network_time) +double Supervisor::NextTimestamp(double* local_network_time) { return timer_mgr->Time(); } -void zeek::Supervisor::Process() +void Supervisor::Process() { HandleChildSignal(); @@ -357,7 +359,7 @@ void Stem::Reap() } } -bool Stem::Wait(zeek::Supervisor::Node* node, int options) const +bool Stem::Wait(Supervisor::Node* node, int options) const { int status; auto res = waitpid(node->pid, &status, options); @@ -394,7 +396,7 @@ bool Stem::Wait(zeek::Supervisor::Node* node, int options) const return true; } -void Stem::KillNode(const zeek::Supervisor::Node& node, int signal) const +void Stem::KillNode(const Supervisor::Node& node, int signal) const { auto kill_res = kill(node.pid, signal); @@ -403,7 +405,7 @@ void Stem::KillNode(const zeek::Supervisor::Node& node, int signal) const node.name.data(), strerror(errno)); } -void Stem::Destroy(zeek::Supervisor::Node* node) const +void Stem::Destroy(Supervisor::Node* node) const { constexpr auto max_term_attempts = 13; constexpr auto kill_delay = 2; @@ -424,7 +426,7 @@ void Stem::Destroy(zeek::Supervisor::Node* node) const } } -zeek::Supervisor::Node* Stem::Revive() +Supervisor::Node* Stem::Revive() { constexpr auto attempts_before_delay_increase = 3; constexpr auto delay_increase_factor = 2; @@ -459,7 +461,7 @@ zeek::Supervisor::Node* Stem::Revive() node.revival_delay *= delay_increase_factor; if ( Spawn(&node) ) - return new zeek::Supervisor::Node(node); + return new Supervisor::Node(node); ReportStatus(node); } @@ -467,7 +469,7 @@ zeek::Supervisor::Node* Stem::Revive() return {}; } -bool Stem::Spawn(zeek::Supervisor::Node* node) +bool Stem::Spawn(Supervisor::Node* node) { auto node_pid = fork(); @@ -552,13 +554,13 @@ void Stem::Shutdown(int exit_code) } } -void Stem::ReportStatus(const zeek::Supervisor::Node& node) const +void Stem::ReportStatus(const Supervisor::Node& node) const { std::string msg = fmt("status %s %d", node.name.data(), node.pid); safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } -zeek::Supervisor::Node* Stem::Run() +Supervisor::Node* Stem::Run() { for ( ; ; ) { @@ -571,7 +573,7 @@ zeek::Supervisor::Node* Stem::Run() return {}; } -zeek::Supervisor::Node* Stem::Poll() +Supervisor::Node* Stem::Poll() { pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, { signal_flare->FD(), POLLIN, 0} }; @@ -650,10 +652,10 @@ zeek::Supervisor::Node* Stem::Poll() { const auto& node_json = msg_tokens[2]; assert(nodes.find(node_name) == nodes.end()); - auto node = zeek::Supervisor::Node::FromJSON(node_json); + auto node = Supervisor::Node::FromJSON(node_json); if ( Spawn(&node) ) - return new zeek::Supervisor::Node(node); + return new Supervisor::Node(node); // TODO: get stem printfs going through standard Zeek debug.log printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); @@ -678,7 +680,7 @@ zeek::Supervisor::Node* Stem::Poll() Destroy(&node); if ( Spawn(&node) ) - return new zeek::Supervisor::Node(node); + return new Supervisor::Node(node); ReportStatus(node); } @@ -689,7 +691,7 @@ zeek::Supervisor::Node* Stem::Poll() return {}; } -zeek::Supervisor::Node* zeek::Supervisor::RunStem(std::unique_ptr pipe) +Supervisor::Node* Supervisor::RunStem(std::unique_ptr pipe) { Stem s(std::move(pipe)); return s.Run(); @@ -709,9 +711,9 @@ static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) return BifEnum::Supervisor::NONE; } -zeek::Supervisor::Node zeek::Supervisor::Node::FromRecord(const RecordVal* node) +Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) { - zeek::Supervisor::Node rval; + Supervisor::Node rval; rval.name = node->Lookup("name")->AsString()->CheckString(); auto iface_val = node->Lookup("interface"); @@ -726,12 +728,11 @@ zeek::Supervisor::Node zeek::Supervisor::Node::FromRecord(const RecordVal* node) while ( (v = cluster_table->NextEntry(k, c)) ) { - auto key = cluster_table_val->RecoverIndex(k); + IntrusivePtr key{cluster_table_val->RecoverIndex(k), false}; auto name = key->Index(0)->AsStringVal()->ToStdString(); - Unref(key); auto rv = v->Value()->AsRecordVal(); - zeek::Supervisor::ClusterEndpoint ep; + Supervisor::ClusterEndpoint ep; ep.role = static_cast(rv->Lookup("role")->AsEnum()); ep.host = rv->Lookup("host")->AsAddr().AsString(); ep.port = rv->Lookup("p")->AsPortVal()->Port(); @@ -747,9 +748,9 @@ zeek::Supervisor::Node zeek::Supervisor::Node::FromRecord(const RecordVal* node) return rval; } -zeek::Supervisor::Node zeek::Supervisor::Node::FromJSON(const std::string& json) +Supervisor::Node Supervisor::Node::FromJSON(const std::string& json) { - zeek::Supervisor::Node rval; + Supervisor::Node rval; auto j = nlohmann::json::parse(json); rval.name = j["name"]; @@ -784,27 +785,23 @@ zeek::Supervisor::Node zeek::Supervisor::Node::FromJSON(const std::string& json) return rval; } -std::string zeek::Supervisor::Node::ToJSON() const +std::string Supervisor::Node::ToJSON() const { - auto re = new RE_Matcher("^_"); + auto re = std::make_unique("^_"); auto node_val = ToRecord(); - auto json_val = node_val->ToJSON(false, re); + IntrusivePtr json_val{node_val->ToJSON(false, re.get()), false}; auto rval = json_val->ToStdString(); - delete re; - Unref(node_val); - Unref(json_val); return rval; } -RecordVal* zeek::Supervisor::Node::ToRecord() const +IntrusivePtr Supervisor::Node::ToRecord() const { auto rt = BifType::Record::Supervisor::Node; - auto rval = new RecordVal(rt); + auto rval = make_intrusive(rt); rval->Assign(rt->FieldOffset("name"), new StringVal(name)); - if ( ! interface.empty() ) - rval->Assign(rt->FieldOffset("interface"), - new StringVal(interface)); + if ( interface ) + rval->Assign(rt->FieldOffset("interface"), new StringVal(*interface)); auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); auto cluster_val = new TableVal(tt->AsTableType()); @@ -814,19 +811,18 @@ RecordVal* zeek::Supervisor::Node::ToRecord() const { auto& name = e.first; auto& ep = e.second; - auto key = new StringVal(name); + auto key = make_intrusive(name); auto ept = BifType::Record::Supervisor::ClusterEndpoint; - auto val = new RecordVal(ept); + auto val = make_intrusive(ept); val->Assign(ept->FieldOffset("role"), BifType::Enum::Supervisor::ClusterRole->GetVal(ep.role)); val->Assign(ept->FieldOffset("host"), new AddrVal(ep.host)); val->Assign(ept->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); - if ( ! ep.interface.empty() ) - val->Assign(ept->FieldOffset("interface"), new StringVal(ep.interface)); + if ( ep.interface ) + val->Assign(ept->FieldOffset("interface"), new StringVal(*ep.interface)); - cluster_val->Assign(key, val); - Unref(key); + cluster_val->Assign(key.get(), val.detach()); } if ( pid ) @@ -853,14 +849,14 @@ static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRol } } -void zeek::Supervisor::Node::InitCluster() +void Supervisor::Node::InitCluster() { auto cluster_node_type = global_scope()->Lookup("Cluster::Node")->AsType()->AsRecordType(); auto cluster_nodes_id = global_scope()->Lookup("Cluster::nodes"); auto cluster_manager_is_logger_id = global_scope()->Lookup("Cluster::manager_is_logger"); auto cluster_nodes = cluster_nodes_id->ID_Val()->AsTableVal(); auto has_logger = false; - std::string manager_name; + std::optional manager_name; for ( const auto& e : supervised_node->cluster ) { @@ -874,30 +870,29 @@ void zeek::Supervisor::Node::InitCluster() { const auto& node_name = e.first; const auto& ep = e.second; - auto key = new StringVal(node_name); - auto val = new RecordVal(cluster_node_type); + auto key = make_intrusive(node_name); + auto val = make_intrusive(cluster_node_type); auto node_type = supervisor_role_to_cluster_node_type(ep.role); val->Assign(cluster_node_type->FieldOffset("node_type"), node_type); val->Assign(cluster_node_type->FieldOffset("ip"), new AddrVal(ep.host)); val->Assign(cluster_node_type->FieldOffset("p"), val_mgr->GetPort(ep.port, TRANSPORT_TCP)); - if ( ! ep.interface.empty() ) + if ( ep.interface ) val->Assign(cluster_node_type->FieldOffset("interface"), - new StringVal(ep.interface)); + new StringVal(*ep.interface)); - if ( ! manager_name.empty() && ep.role != BifEnum::Supervisor::MANAGER ) + if ( manager_name && ep.role != BifEnum::Supervisor::MANAGER ) val->Assign(cluster_node_type->FieldOffset("manager"), - new StringVal(manager_name)); + new StringVal(*manager_name)); - cluster_nodes->Assign(key, val); - Unref(key); + cluster_nodes->Assign(key.get(), val.detach()); } cluster_manager_is_logger_id->SetVal(val_mgr->GetBool(! has_logger)); } -RecordVal* zeek::Supervisor::Status(const std::string& node_name) +RecordVal* Supervisor::Status(const std::string& node_name) { // TODO: handle node classes auto rval = new RecordVal(BifType::Record::Supervisor::Status); @@ -908,22 +903,21 @@ RecordVal* zeek::Supervisor::Status(const std::string& node_name) for ( const auto& n : nodes ) { const auto& node = n.second; - auto key = new StringVal(node.name); + auto key = make_intrusive(node.name); auto val = node.ToRecord(); - node_table_val->Assign(key, val); - Unref(key); + node_table_val->Assign(key.get(), val.detach()); } return rval; } -std::string zeek::Supervisor::Create(const RecordVal* node_val) +std::string Supervisor::Create(const RecordVal* node_val) { - auto node = zeek::Supervisor::Node::FromRecord(node_val); + auto node = Supervisor::Node::FromRecord(node_val); return Create(node); } -std::string zeek::Supervisor::Create(const zeek::Supervisor::Node& node) +std::string Supervisor::Create(const Supervisor::Node& node) { if ( node.name.find(' ') != std::string::npos ) return fmt("node names must not contain spaces: '%s'", @@ -938,7 +932,7 @@ std::string zeek::Supervisor::Create(const zeek::Supervisor::Node& node) return ""; } -bool zeek::Supervisor::Destroy(const std::string& node_name) +bool Supervisor::Destroy(const std::string& node_name) { // TODO: handle node classes @@ -950,7 +944,7 @@ bool zeek::Supervisor::Destroy(const std::string& node_name) return true; } -bool zeek::Supervisor::Restart(const std::string& node_name) +bool Supervisor::Restart(const std::string& node_name) { // TODO: handle node classes diff --git a/src/Supervisor.h b/src/Supervisor.h index 198a751913..543ff8a89e 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,7 @@ #include "Pipe.h" #include "Flare.h" #include "NetVar.h" +#include "IntrusivePtr.h" namespace zeek { @@ -29,7 +31,7 @@ public: BifEnum::Supervisor::ClusterRole role; std::string host; int port; - std::string interface; + std::optional interface; }; struct Node { @@ -39,10 +41,10 @@ public: static void InitCluster(); std::string ToJSON() const; - RecordVal* ToRecord() const; + IntrusivePtr ToRecord() const; std::string name; - std::string interface; + std::optional interface; std::map cluster; pid_t pid = 0; diff --git a/src/main.cc b/src/main.cc index 764b129344..091a3ac33d 100644 --- a/src/main.cc +++ b/src/main.cc @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef HAVE_GETOPT_H #include #endif @@ -219,11 +220,11 @@ struct zeek_options { bool print_signature_debug_info = false; int print_plugins = 0; - std::string debug_log_streams; - std::string debug_script_tracing_file; + std::optional debug_log_streams; + std::optional debug_script_tracing_file; - std::string identifier_to_print; - std::string script_code_to_exec; + std::optional identifier_to_print; + std::optional script_code_to_exec; std::vector script_prefixes = { "" }; // "" = "no prefix" int supervised_workers = 0; @@ -239,16 +240,16 @@ struct zeek_options { bool perftools_check_leaks = false; bool perftools_profile = false; - std::string pcap_filter; + std::optional pcap_filter; std::vector interfaces; std::vector pcap_files; std::vector signature_files; - std::string pcap_output_file; - std::string random_seed_input_file; - std::string random_seed_output_file; - std::string process_status_file; - std::string zeekygen_config_file; + std::optional pcap_output_file; + std::optional random_seed_input_file; + std::optional random_seed_output_file; + std::optional process_status_file; + std::optional zeekygen_config_file; std::string libidmef_dtd_file = "idmef-message.dtd"; std::set plugins_to_load; @@ -329,12 +330,7 @@ static zeek_options parse_cmdline(int argc, char** argv) rval.debug_scripts = true; break; case 'e': - if ( optarg[0] == 0 ) - // Cheating a bit, but allows checking for an empty string - // to determine whether -e was used or not. - rval.script_code_to_exec = " "; - else - rval.script_code_to_exec = optarg; + rval.script_code_to_exec = optarg; break; case 'f': rval.pcap_filter = optarg; @@ -681,7 +677,7 @@ static std::vector get_script_signature_files() return rval; } -static std::string get_exe_path(std::string invocation) +static std::string get_exe_path(const std::string& invocation) { if ( invocation.empty() ) return ""; @@ -792,8 +788,8 @@ int main(int argc, char** argv) options = {}; const auto& node_name = zeek::supervised_node->name; - if ( ! zeek::supervised_node->interface.empty() ) - options.interfaces.emplace_back(zeek::supervised_node->interface); + if ( zeek::supervised_node->interface ) + options.interfaces.emplace_back(*zeek::supervised_node->interface); if ( ! zeek::supervised_node->cluster.empty() ) { @@ -838,17 +834,17 @@ int main(int argc, char** argv) fprintf(stderr, "Zeek script debugging ON.\n"); } - if ( ! options.script_code_to_exec.empty() ) - command_line_policy = options.script_code_to_exec.data(); + if ( options.script_code_to_exec ) + command_line_policy = options.script_code_to_exec->data(); - if ( ! options.debug_script_tracing_file.empty() ) + if ( options.debug_script_tracing_file ) { - g_trace_state.SetTraceFile(options.debug_script_tracing_file.data()); + g_trace_state.SetTraceFile(options.debug_script_tracing_file->data()); g_trace_state.TraceOn(); } - if ( ! options.process_status_file.empty() ) - proc_status_file = options.process_status_file.data(); + if ( options.process_status_file ) + proc_status_file = options.process_status_file->data(); atexit(atexit_handler); set_processing_status("INITIALIZING", "main"); @@ -862,9 +858,9 @@ int main(int argc, char** argv) plugin_mgr = new plugin::Manager(); #ifdef DEBUG - if ( ! options.debug_log_streams.empty() ) + if ( options.debug_log_streams ) { - debug_logger.EnableStreams(options.debug_log_streams.data()); + debug_logger.EnableStreams(options.debug_log_streams->data()); const char* debug_log_name = nullptr; if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) @@ -896,11 +892,11 @@ int main(int argc, char** argv) const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); - if ( ! options.random_seed_input_file.empty() ) - seed_load_file = options.random_seed_input_file.data(); + if ( options.random_seed_input_file ) + seed_load_file = options.random_seed_input_file->data(); init_random_seed((seed_load_file && *seed_load_file ? seed_load_file : 0), - options.random_seed_output_file.empty() ? 0 : options.random_seed_output_file.data()); + options.random_seed_output_file ? options.random_seed_output_file->data() : 0); // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); init_hash_function(); @@ -929,8 +925,8 @@ int main(int argc, char** argv) timer_mgr = new PQ_TimerMgr(""); // timer_mgr = new CQ_TimerMgr(); - zeekygen_mgr = new zeekygen::Manager(options.zeekygen_config_file, - bro_argv[0]); + auto zeekygen_cfg = options.zeekygen_config_file.value_or(""); + zeekygen_mgr = new zeekygen::Manager(zeekygen_cfg, bro_argv[0]); add_essential_input_file("base/init-bare.zeek"); add_essential_input_file("base/init-frameworks-and-bifs.zeek"); @@ -944,7 +940,7 @@ int main(int argc, char** argv) options.script_options_to_set.empty() && options.pcap_files.size() == 0 && options.interfaces.size() == 0 && - options.identifier_to_print.empty() && + ! options.identifier_to_print && ! command_line_policy && ! options.print_plugins && ! use_supervisor() && ! zeek::supervised_node ) add_input_file("-"); @@ -1077,14 +1073,14 @@ int main(int argc, char** argv) reporter->InitOptions(); zeekygen_mgr->GenerateDocs(); - if ( ! options.pcap_filter.empty() ) + if ( options.pcap_filter ) { ID* id = global_scope()->Lookup("cmd_line_bpf_filter"); if ( ! id ) reporter->InternalError("global cmd_line_bpf_filter not defined"); - id->SetVal(new StringVal(options.pcap_filter)); + id->SetVal(new StringVal(*options.pcap_filter)); } auto all_signature_files = options.signature_files; @@ -1164,11 +1160,11 @@ int main(int argc, char** argv) } // Print the ID. - if ( ! options.identifier_to_print.empty() ) + if ( options.identifier_to_print ) { - ID* id = global_scope()->Lookup(options.identifier_to_print); + ID* id = global_scope()->Lookup(*options.identifier_to_print); if ( ! id ) - reporter->FatalError("No such ID: %s\n", options.identifier_to_print.data()); + reporter->FatalError("No such ID: %s\n", options.identifier_to_print->data()); ODesc desc; desc.SetQuotes(true); From aaa702fb4d6ba5015db1767825541dba1be61310 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 6 Jan 2020 18:39:14 -0800 Subject: [PATCH 16/76] Add option to change supervised node's working directory --- scripts/base/frameworks/supervisor/api.zeek | 1 + src/Supervisor.cc | 26 +++++++++++++++++---- src/Supervisor.h | 1 + src/main.cc | 12 ++++++++++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 7a13a5ce09..0c2ad8285c 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -22,6 +22,7 @@ export { type Node: record { name: string; interface: string &optional; + directory: string &optional; cluster: table[string] of ClusterEndpoint &default=table(); # TODO: separate node config fields from status fields ? diff --git a/src/Supervisor.cc b/src/Supervisor.cc index bec830237b..135795e0d3 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -720,6 +720,11 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) if ( iface_val ) rval.interface = iface_val->AsString()->CheckString(); + auto directory_val = node->Lookup("directory"); + + if ( directory_val ) + rval.directory = directory_val->AsString()->CheckString(); + auto cluster_table_val = node->Lookup("cluster")->AsTableVal(); auto cluster_table = cluster_table_val->AsTable(); auto c = cluster_table->InitForIteration(); @@ -754,11 +759,12 @@ Supervisor::Node Supervisor::Node::FromJSON(const std::string& json) auto j = nlohmann::json::parse(json); rval.name = j["name"]; - auto it = j.find("interface"); - - if ( it != j.end() ) + if ( auto it = j.find("interface"); it != j.end() ) rval.interface = *it; + if ( auto it = j.find("directory"); it != j.end() ) + rval.directory= *it; + auto cluster = j["cluster"]; for ( const auto& e : cluster.items() ) @@ -803,6 +809,9 @@ IntrusivePtr Supervisor::Node::ToRecord() const if ( interface ) rval->Assign(rt->FieldOffset("interface"), new StringVal(*interface)); + if ( directory ) + rval->Assign(rt->FieldOffset("directory"), new StringVal(*directory)); + auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); auto cluster_val = new TableVal(tt->AsTableType()); rval->Assign(rt->FieldOffset("cluster"), cluster_val); @@ -926,6 +935,15 @@ std::string Supervisor::Create(const Supervisor::Node& node) if ( nodes.find(node.name) != nodes.end() ) return fmt("node with name '%s' already exists", node.name.data()); + if ( node.directory ) + { + auto res = ensure_intermediate_dirs(node.directory->data()); + + if ( ! res ) + return fmt("failed to create working directory %s\n", + node.directory->data()); + } + auto msg = make_create_message(node); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); nodes.emplace(node.name, node); diff --git a/src/Supervisor.h b/src/Supervisor.h index 543ff8a89e..52e4bef016 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -45,6 +45,7 @@ public: std::string name; std::optional interface; + std::optional directory; std::map cluster; pid_t pid = 0; diff --git a/src/main.cc b/src/main.cc index 091a3ac33d..77121d453e 100644 --- a/src/main.cc +++ b/src/main.cc @@ -800,6 +800,18 @@ int main(int argc, char** argv) exit(1); } } + + if ( zeek::supervised_node->directory ) + { + if ( chdir(zeek::supervised_node->directory->data()) ) + { + fprintf(stderr, "supervised node %s failed to chdir to %s: %s\n", + node_name.data(), + zeek::supervised_node->directory->data(), + strerror(errno)); + exit(1); + } + } } std::set_new_handler(bro_new_handler); From b114766205da69ebc87a50e7f4a2fddccc50d2ae Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 7 Jan 2020 10:23:25 -0800 Subject: [PATCH 17/76] Add Supervisor::is_supervised() And use to to avoid redef'ing the log rotation postprocessor to "archive-log" by default since it's unlikely PATH is configured to find that script. --- scripts/base/frameworks/cluster/nodes/logger.zeek | 3 +++ scripts/base/frameworks/supervisor/api.zeek | 2 ++ scripts/base/frameworks/supervisor/main.zeek | 5 +++++ src/supervisor.bif | 5 +++++ 4 files changed, 15 insertions(+) diff --git a/scripts/base/frameworks/cluster/nodes/logger.zeek b/scripts/base/frameworks/cluster/nodes/logger.zeek index 6fb5d09208..a56d4a4d66 100644 --- a/scripts/base/frameworks/cluster/nodes/logger.zeek +++ b/scripts/base/frameworks/cluster/nodes/logger.zeek @@ -23,4 +23,7 @@ redef Log::default_rotation_interval = 1 hrs; redef Log::default_mail_alarms_interval = 24 hrs; ## Use the cluster's archive logging script. + +@if ( ! Supervisor::is_supervised() ) redef Log::default_rotation_postprocessor_cmd = "archive-log"; +@endif diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 0c2ad8285c..e8ecb1d726 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -39,6 +39,8 @@ export { global destroy: function(nodes: string): bool; global restart: function(nodes: string &default="all"): bool; + global is_supervised: function(): bool; + global Supervisor::stop_request: event(); global Supervisor::status_request: event(reqid: string, nodes: string); diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 6668524f9d..83c1c28787 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -68,3 +68,8 @@ function Supervisor::restart(nodes: string): bool { return Supervisor::__restart(nodes); } + +function is_supervised(): bool + { + return Supervisor::__is_supervised(); + } diff --git a/src/supervisor.bif b/src/supervisor.bif index b6c7b24914..c1d9b12b1c 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -75,3 +75,8 @@ function Supervisor::__init_cluster%(%): bool return val_mgr->GetBool(false); %} + +function Supervisor::__is_supervised%(%): bool + %{ + return val_mgr->GetBool(zeek::supervised_node != nullptr); + %} From 22431dbecb16582e9ace895860157d92b92c0c80 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 7 Jan 2020 11:59:39 -0800 Subject: [PATCH 18/76] Use string_view in Supervisor API where possible --- src/Supervisor.cc | 26 ++++++++++++++++++-------- src/Supervisor.h | 11 ++++++----- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 135795e0d3..a9063513b1 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -2,10 +2,12 @@ #include #include #include -#include #include #include +#include +#include + #include "Supervisor.h" #include "Reporter.h" #include "DebugLogger.h" @@ -753,7 +755,7 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) return rval; } -Supervisor::Node Supervisor::Node::FromJSON(const std::string& json) +Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) { Supervisor::Node rval; auto j = nlohmann::json::parse(json); @@ -901,7 +903,7 @@ void Supervisor::Node::InitCluster() cluster_manager_is_logger_id->SetVal(val_mgr->GetBool(! has_logger)); } -RecordVal* Supervisor::Status(const std::string& node_name) +RecordVal* Supervisor::Status(std::string_view node_name) { // TODO: handle node classes auto rval = new RecordVal(BifType::Record::Supervisor::Status); @@ -950,26 +952,34 @@ std::string Supervisor::Create(const Supervisor::Node& node) return ""; } -bool Supervisor::Destroy(const std::string& node_name) +bool Supervisor::Destroy(std::string_view node_name) { // TODO: handle node classes - if ( ! nodes.erase(node_name) ) + auto it = nodes.find(node_name); + + if ( it == nodes.end() ) return false; - std::string msg = fmt("destroy %s", node_name.data()); + nodes.erase(it); + + std::stringstream ss; + ss << "destroy " << node_name; + std::string msg = ss.str(); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); return true; } -bool Supervisor::Restart(const std::string& node_name) +bool Supervisor::Restart(std::string_view node_name) { // TODO: handle node classes if ( nodes.find(node_name) == nodes.end() ) return false; - std::string msg = fmt("restart %s", node_name.data()); + std::stringstream ss; + ss << "restart " << node_name; + std::string msg = ss.str(); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); return true; } diff --git a/src/Supervisor.h b/src/Supervisor.h index 52e4bef016..994a65a381 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,7 @@ public: struct Node { static Node FromRecord(const RecordVal* node_val); - static Node FromJSON(const std::string& json); + static Node FromJSON(std::string_view json); static void InitCluster(); @@ -67,11 +68,11 @@ public: void ObserveChildSignal(); - RecordVal* Status(const std::string& node_name); + RecordVal* Status(std::string_view node_name); std::string Create(const RecordVal* node); std::string Create(const Supervisor::Node& node); - bool Destroy(const std::string& node_name); - bool Restart(const std::string& node_name); + bool Destroy(std::string_view node_name); + bool Restart(std::string_view node_name); private: @@ -94,7 +95,7 @@ private: pid_t stem_pid; std::unique_ptr stem_pipe; bro::Flare signal_flare; - std::map nodes; + std::map> nodes; std::string msg_buffer; }; From 0e19520e139b708e315bff30fb4462406c7e9680 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 7 Jan 2020 20:42:43 -0800 Subject: [PATCH 19/76] Fix Supervisor memory leak --- src/Supervisor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index a9063513b1..135d6adfc7 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -736,6 +736,7 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) while ( (v = cluster_table->NextEntry(k, c)) ) { IntrusivePtr key{cluster_table_val->RecoverIndex(k), false}; + delete k; auto name = key->Index(0)->AsStringVal()->ToStdString(); auto rv = v->Value()->AsRecordVal(); From 297317b232ca53f5b052d19dc870bc2cdd16343c Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 8 Jan 2020 14:05:19 -0800 Subject: [PATCH 20/76] Organize command-line options for Supervisor filtering/inheritance Also have stem process execv() with original command-line arguments so that they're re-parsed and inherited correctly by supervised-nodes in the event the stem process needs to be re-created. --- scripts/base/frameworks/supervisor/api.zeek | 1 + scripts/base/frameworks/supervisor/main.zeek | 5 + src/Supervisor.cc | 11 +- src/main.cc | 114 ++++++++++++++----- src/supervisor.bif | 5 + 5 files changed, 103 insertions(+), 33 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index e8ecb1d726..0426eb618f 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -39,6 +39,7 @@ export { global destroy: function(nodes: string): bool; global restart: function(nodes: string &default="all"): bool; + global is_supervisor: function(): bool; global is_supervised: function(): bool; global Supervisor::stop_request: event(); diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 83c1c28787..b98fd59847 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -69,6 +69,11 @@ function Supervisor::restart(nodes: string): bool return Supervisor::__restart(nodes); } +function is_supervisor(): bool + { + return Supervisor::__is_supervisor(); + } + function is_supervised(): bool { return Supervisor::__is_supervised(); diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 135d6adfc7..005f631590 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -250,10 +250,15 @@ void Supervisor::HandleChildSignal() stem_pipe->In().UnsetFlags(FD_CLOEXEC); stem_pipe->Out().UnsetFlags(FD_CLOEXEC); - auto res = execl(config.zeek_exe_path.data(), - config.zeek_exe_path.data(), - (char*)0); + char** args = new char*[bro_argc + 1]; + args[0] = config.zeek_exe_path.data(); + args[bro_argc] = nullptr; + + for ( auto i = 1; i < bro_argc; ++i ) + args[i] = bro_argv[i]; + + auto res = execv(config.zeek_exe_path.data(), args); fprintf(stderr, "failed to exec Zeek supervisor stem process: %s\n", strerror(errno)); exit(1); diff --git a/src/main.cc b/src/main.cc index a2d3da0409..498a0b428a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -264,6 +264,65 @@ struct zeek_options { std::set plugins_to_load; std::vector scripts_to_load; std::vector script_options_to_set; + + /** + * Unset options that aren't meant to be used by the supervisor, but may + * make sense for supervised nodes to inherit (as opposed to flagging + * as an error an exiting outright if used in supervisor-mode). + */ + void filter_supervisor_options() + { + pcap_filter = {}; + interfaces = {}; + pcap_files = {}; + signature_files = {}; + pcap_output_file = {}; + } + + /** + * Inherit certain options set in the original supervisor parent process + * and discard the rest. + * @param node the supervised-node whose Zeek options are to be modified. + */ + void filter_supervised_node_options(zeek::Supervisor::Node* node) + { + auto og = *this; + *this = {}; + + debug_log_streams = og.debug_log_streams; + debug_script_tracing_file = og.debug_script_tracing_file; + script_code_to_exec = og.script_code_to_exec; + script_prefixes = og.script_prefixes; + + signature_re_level = og.signature_re_level; + ignore_checksums = og.ignore_checksums; + use_watchdog = og.use_watchdog; + pseudo_realtime = og.pseudo_realtime; + dns_mode = og.dns_mode; + + bare_mode = og.bare_mode; + perftools_check_leaks = og.perftools_check_leaks; + perftools_profile = og.perftools_profile; + + pcap_filter = og.pcap_filter; + signature_files = og.signature_files; + + // TODO: These are likely to be handled in a node-specific or + // use-case-specific way. e.g. interfaces is already handled for the + // "cluster" use-case, but don't have supervised-pcap-reading + // functionality yet. + /* interfaces = og.interfaces; */ + /* pcap_files = og.pcap_files; */ + + pcap_output_file = og.pcap_output_file; + random_seed_input_file = og.random_seed_input_file; + random_seed_output_file = og.random_seed_output_file; + process_status_file = og.process_status_file; + + plugins_to_load = og.plugins_to_load; + scripts_to_load = og.scripts_to_load; + script_options_to_set = og.script_options_to_set; + } }; static std::vector to_cargs(const std::vector& args) @@ -828,28 +887,11 @@ int main(int argc, char** argv) return context.run(); } - auto use_supervisor = [&]() -> bool { return options.supervised_workers > 0; }; pid_t stem_pid = 0; std::unique_ptr supervisor_pipe; - - if ( use_supervisor() ) - { - // TODO: the SIGCHLD handler should be set before fork() - supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); - stem_pid = fork(); - - if ( stem_pid == -1 ) - { - fprintf(stderr, "failed to fork Zeek supervisor stem process: %s\n", - strerror(errno)); - exit(1); - } - - if ( stem_pid == 0 ) - zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); - } - auto zeek_stem_env = getenv("ZEEK_STEM"); + auto is_supervisor = [](const zeek_options& os) -> bool + { return os.supervised_workers > 0; }; if ( zeek_stem_env ) { @@ -871,13 +913,28 @@ int main(int argc, char** argv) supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } + else if ( is_supervisor(options) ) + { + // TODO: the SIGCHLD handler should be set before fork() + supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); + stem_pid = fork(); + + if ( stem_pid == -1 ) + { + fprintf(stderr, "failed to fork Zeek supervisor stem process: %s\n", + strerror(errno)); + exit(1); + } + + if ( stem_pid == 0 ) + zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + } if ( zeek::supervised_node ) { - // TODO: possibly can inherit some command-line options? - // In case stem gets revived via exec(), would need to pass along - // original arguments to it. - options = {}; + // TODO: probably all of this block could move to a new + // zeek::supervised_node->Init(options) method + options.filter_supervised_node_options(zeek::supervised_node); const auto& node_name = zeek::supervised_node->name; if ( zeek::supervised_node->interface ) @@ -967,7 +1024,7 @@ int main(int argc, char** argv) if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) { - if ( use_supervisor() ) + if ( is_supervisor(options) ) debug_log_name = "debug-supervisor"; else debug_log_name = "debug"; @@ -977,19 +1034,16 @@ int main(int argc, char** argv) } #endif - if ( use_supervisor() ) + if ( is_supervisor(options) ) { zeek::Supervisor::Config cfg = {}; cfg.pcaps = options.pcap_files; cfg.num_workers = options.supervised_workers; cfg.zeek_exe_path = zeek_exe_path; + options.filter_supervisor_options(); zeek::supervisor = new zeek::Supervisor(std::move(cfg), std::move(supervisor_pipe), stem_pid); - - // TODO: what options actually apply to the supervisor ? - options.pcap_files = {}; - options.interfaces = {}; } const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); @@ -1044,7 +1098,7 @@ int main(int argc, char** argv) options.interfaces.size() == 0 && ! options.identifier_to_print && ! command_line_policy && ! options.print_plugins && - ! use_supervisor() && ! zeek::supervised_node ) + ! is_supervisor(options) && ! zeek::supervised_node ) add_input_file("-"); for ( const auto& script_option : options.script_options_to_set ) diff --git a/src/supervisor.bif b/src/supervisor.bif index c1d9b12b1c..f55e99f65c 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -80,3 +80,8 @@ function Supervisor::__is_supervised%(%): bool %{ return val_mgr->GetBool(zeek::supervised_node != nullptr); %} + +function Supervisor::__is_supervisor%(%): bool + %{ + return val_mgr->GetBool(zeek::supervisor != nullptr); + %} From 00cd04b0ae845ef673d5e8d3aca390770096c9c5 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 8 Jan 2020 14:52:46 -0800 Subject: [PATCH 21/76] Extend Supervisor Node config with list of custom scripts --- scripts/base/frameworks/supervisor/api.zeek | 1 + src/Supervisor.cc | 20 ++++++++++++++++++++ src/Supervisor.h | 1 + src/main.cc | 3 +++ 4 files changed, 25 insertions(+) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 0426eb618f..ed330d5a3f 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -23,6 +23,7 @@ export { name: string; interface: string &optional; directory: string &optional; + scripts: vector of string &default = vector(); cluster: table[string] of ClusterEndpoint &default=table(); # TODO: separate node config fields from status fields ? diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 005f631590..a719b83115 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -732,6 +732,14 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) if ( directory_val ) rval.directory = directory_val->AsString()->CheckString(); + auto scripts_val = node->Lookup("scripts")->AsVectorVal(); + + for ( auto i = 0; i < scripts_val->Size(); ++i ) + { + auto script = scripts_val->Lookup(i)->AsStringVal()->ToStdString(); + rval.scripts.emplace_back(std::move(script)); + } + auto cluster_table_val = node->Lookup("cluster")->AsTableVal(); auto cluster_table = cluster_table_val->AsTable(); auto c = cluster_table->InitForIteration(); @@ -773,6 +781,11 @@ Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) if ( auto it = j.find("directory"); it != j.end() ) rval.directory= *it; + auto scripts = j["scripts"]; + + for ( auto& s : scripts ) + rval.scripts.emplace_back(std::move(s)); + auto cluster = j["cluster"]; for ( const auto& e : cluster.items() ) @@ -820,6 +833,13 @@ IntrusivePtr Supervisor::Node::ToRecord() const if ( directory ) rval->Assign(rt->FieldOffset("directory"), new StringVal(*directory)); + auto st = BifType::Record::Supervisor::Node->FieldType("scripts"); + auto scripts_val = new VectorVal(st->AsVectorType()); + rval->Assign(rt->FieldOffset("scripts"), scripts_val); + + for ( const auto& s : scripts ) + scripts_val->Assign(scripts_val->Size(), new StringVal(s)); + auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); auto cluster_val = new TableVal(tt->AsTableType()); rval->Assign(rt->FieldOffset("cluster"), cluster_val); diff --git a/src/Supervisor.h b/src/Supervisor.h index 994a65a381..70cc89e3a6 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -47,6 +47,7 @@ public: std::string name; std::optional interface; std::optional directory; + std::vector scripts; std::map cluster; pid_t pid = 0; diff --git a/src/main.cc b/src/main.cc index 498a0b428a..f5fcb2a07e 100644 --- a/src/main.cc +++ b/src/main.cc @@ -961,6 +961,9 @@ int main(int argc, char** argv) exit(1); } } + + for ( const auto& s : zeek::supervised_node->scripts ) + options.scripts_to_load.emplace_back(s); } double time_start = current_time(true); From 263a5f404a7f083ca7221c7c87d940bb60932d76 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 10 Jan 2020 18:25:42 -0800 Subject: [PATCH 22/76] Add cpu affinity option to supervised node config --- scripts/base/frameworks/supervisor/api.zeek | 1 + src/CMakeLists.txt | 1 + src/Supervisor.cc | 13 +++++- src/Supervisor.h | 1 + src/main.cc | 15 +++++- src/zeek-affinity.cc | 51 +++++++++++++++++++++ src/zeek-affinity.h | 15 ++++++ 7 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 src/zeek-affinity.cc create mode 100644 src/zeek-affinity.h diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index ed330d5a3f..d335a04543 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -24,6 +24,7 @@ export { interface: string &optional; directory: string &optional; scripts: vector of string &default = vector(); + cpu_affinity: int &optional; cluster: table[string] of ClusterEndpoint &default=table(); # TODO: separate node config fields from status fields ? diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 24b991d88d..c82580387f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -204,6 +204,7 @@ set(MAIN_SRCS net_util.cc util.cc module_util.cc + zeek-affinity.cc Anon.cc Attr.cc Base64.cc diff --git a/src/Supervisor.cc b/src/Supervisor.cc index a719b83115..1aef520686 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -732,6 +732,11 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) if ( directory_val ) rval.directory = directory_val->AsString()->CheckString(); + auto affinity_val = node->Lookup("cpu_affinity"); + + if ( affinity_val ) + rval.cpu_affinity = affinity_val->AsInt(); + auto scripts_val = node->Lookup("scripts")->AsVectorVal(); for ( auto i = 0; i < scripts_val->Size(); ++i ) @@ -779,7 +784,10 @@ Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) rval.interface = *it; if ( auto it = j.find("directory"); it != j.end() ) - rval.directory= *it; + rval.directory = *it; + + if ( auto it = j.find("cpu_affinity"); it != j.end() ) + rval.cpu_affinity = *it; auto scripts = j["scripts"]; @@ -833,6 +841,9 @@ IntrusivePtr Supervisor::Node::ToRecord() const if ( directory ) rval->Assign(rt->FieldOffset("directory"), new StringVal(*directory)); + if ( cpu_affinity ) + rval->Assign(rt->FieldOffset("cpu_affinity"), val_mgr->GetInt(*cpu_affinity)); + auto st = BifType::Record::Supervisor::Node->FieldType("scripts"); auto scripts_val = new VectorVal(st->AsVectorType()); rval->Assign(rt->FieldOffset("scripts"), scripts_val); diff --git a/src/Supervisor.h b/src/Supervisor.h index 70cc89e3a6..623afc36a0 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -47,6 +47,7 @@ public: std::string name; std::optional interface; std::optional directory; + std::optional cpu_affinity; std::vector scripts; std::map cluster; diff --git a/src/main.cc b/src/main.cc index f5fcb2a07e..021c5207aa 100644 --- a/src/main.cc +++ b/src/main.cc @@ -78,6 +78,8 @@ extern "C" { #include "setsignal.h" }; +#include "zeek-affinity.h" + #ifdef USE_PERFTOOLS_DEBUG HeapLeakChecker* heap_checker = 0; int perftools_leaks = 0; @@ -944,7 +946,7 @@ int main(int argc, char** argv) { if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) { - fprintf(stderr, "cluster node %s failed to setenv: %s\n", + fprintf(stderr, "node '%s' failed to setenv: %s\n", node_name.data(), strerror(errno)); exit(1); } @@ -954,7 +956,7 @@ int main(int argc, char** argv) { if ( chdir(zeek::supervised_node->directory->data()) ) { - fprintf(stderr, "supervised node %s failed to chdir to %s: %s\n", + fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", node_name.data(), zeek::supervised_node->directory->data(), strerror(errno)); @@ -962,6 +964,15 @@ int main(int argc, char** argv) } } + if ( zeek::supervised_node->cpu_affinity ) + { + auto res = zeek::set_affinity(*zeek::supervised_node->cpu_affinity); + + if ( ! res ) + fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", + node_name.data(), strerror(errno)); + } + for ( const auto& s : zeek::supervised_node->scripts ) options.scripts_to_load.emplace_back(s); } diff --git a/src/zeek-affinity.cc b/src/zeek-affinity.cc new file mode 100644 index 0000000000..7fef4c203a --- /dev/null +++ b/src/zeek-affinity.cc @@ -0,0 +1,51 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#if defined(__linux__) + +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include + +namespace zeek { +bool set_affinity(int core_number) + { + cpu_set_t cpus; + CPU_ZERO(&cpus); + CPU_SET(core_number, &cpus); + auto res = sched_setaffinity(0, sizeof(cpus), &cpus); + return res == 0; + } +} // namespace zeek + +#elif defined(__FreeBSD__) + +#include +#include + +namespace zeek { +bool set_affinity(int core_number) + { + cpuset_t cpus; + CPU_ZERO(&cpus); + CPU_SET(core_number, &cpus); + auto res = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(cpus), &cpus); + return res == 0; + } +} // namespace zeek + +#else + +#include + +namespace zeek { +bool set_affinity(int core_number) + { + errno = ENOTSUP; + return false; + } +} // namespace zeek + +#endif diff --git a/src/zeek-affinity.h b/src/zeek-affinity.h new file mode 100644 index 0000000000..ce4bdd9c49 --- /dev/null +++ b/src/zeek-affinity.h @@ -0,0 +1,15 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +namespace zeek { + +/** + * Set the process affinity to a given CPU. Currently only supported on + * Linux and FreeBSD. + * @param core_number the core to which this process should set its affinity. + * Cores are typically numbered 0..N. + * @return true if the affinity is successfully set and false if not with + * errno additionally being set to indicate the reason. + */ +bool set_affinity(int core_number); + +} // namespace zeek From 5191e14eff0d07753a3034acbbb09e780959abd9 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 10 Jan 2020 19:48:31 -0800 Subject: [PATCH 23/76] Add stdout/stderr redirection option to supervised node config --- scripts/base/frameworks/supervisor/api.zeek | 2 + src/Supervisor.cc | 22 ++++++++ src/Supervisor.h | 2 + src/main.cc | 61 ++++++++++++++++----- 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index d335a04543..09a80d6cb5 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -23,6 +23,8 @@ export { name: string; interface: string &optional; directory: string &optional; + stdout_file: string &optional; + stderr_file: string &optional; scripts: vector of string &default = vector(); cpu_affinity: int &optional; cluster: table[string] of ClusterEndpoint &default=table(); diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 1aef520686..ff26ca1f28 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -732,6 +732,16 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) if ( directory_val ) rval.directory = directory_val->AsString()->CheckString(); + auto stdout_val = node->Lookup("stdout_file"); + + if ( stdout_val ) + rval.stdout_file = stdout_val->AsString()->CheckString(); + + auto stderr_val = node->Lookup("stderr_file"); + + if ( stderr_val ) + rval.stderr_file = stderr_val->AsString()->CheckString(); + auto affinity_val = node->Lookup("cpu_affinity"); if ( affinity_val ) @@ -786,6 +796,12 @@ Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) if ( auto it = j.find("directory"); it != j.end() ) rval.directory = *it; + if ( auto it = j.find("stdout_file"); it != j.end() ) + rval.stdout_file= *it; + + if ( auto it = j.find("stderr_file"); it != j.end() ) + rval.stderr_file= *it; + if ( auto it = j.find("cpu_affinity"); it != j.end() ) rval.cpu_affinity = *it; @@ -841,6 +857,12 @@ IntrusivePtr Supervisor::Node::ToRecord() const if ( directory ) rval->Assign(rt->FieldOffset("directory"), new StringVal(*directory)); + if ( stdout_file ) + rval->Assign(rt->FieldOffset("stdout_file"), new StringVal(*stdout_file)); + + if ( stderr_file ) + rval->Assign(rt->FieldOffset("stderr_file"), new StringVal(*stderr_file)); + if ( cpu_affinity ) rval->Assign(rt->FieldOffset("cpu_affinity"), val_mgr->GetInt(*cpu_affinity)); diff --git a/src/Supervisor.h b/src/Supervisor.h index 623afc36a0..1eeab43998 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -47,6 +47,8 @@ public: std::string name; std::optional interface; std::optional directory; + std::optional stdout_file; + std::optional stderr_file; std::optional cpu_affinity; std::vector scripts; std::map cluster; diff --git a/src/main.cc b/src/main.cc index 021c5207aa..81c138ee11 100644 --- a/src/main.cc +++ b/src/main.cc @@ -936,22 +936,8 @@ int main(int argc, char** argv) { // TODO: probably all of this block could move to a new // zeek::supervised_node->Init(options) method - options.filter_supervised_node_options(zeek::supervised_node); const auto& node_name = zeek::supervised_node->name; - if ( zeek::supervised_node->interface ) - options.interfaces.emplace_back(*zeek::supervised_node->interface); - - if ( ! zeek::supervised_node->cluster.empty() ) - { - if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) - { - fprintf(stderr, "node '%s' failed to setenv: %s\n", - node_name.data(), strerror(errno)); - exit(1); - } - } - if ( zeek::supervised_node->directory ) { if ( chdir(zeek::supervised_node->directory->data()) ) @@ -964,6 +950,38 @@ int main(int argc, char** argv) } } + if ( zeek::supervised_node->stderr_file ) + { + auto fd = open(zeek::supervised_node->stderr_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", + node_name.data(), + zeek::supervised_node->stderr_file->data(), + strerror(errno)); + exit(1); + } + } + + if ( zeek::supervised_node->stdout_file ) + { + auto fd = open(zeek::supervised_node->stdout_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", + node_name.data(), + zeek::supervised_node->stdout_file->data(), + strerror(errno)); + exit(1); + } + } + if ( zeek::supervised_node->cpu_affinity ) { auto res = zeek::set_affinity(*zeek::supervised_node->cpu_affinity); @@ -973,6 +991,21 @@ int main(int argc, char** argv) node_name.data(), strerror(errno)); } + options.filter_supervised_node_options(zeek::supervised_node); + + if ( zeek::supervised_node->interface ) + options.interfaces.emplace_back(*zeek::supervised_node->interface); + + if ( ! zeek::supervised_node->cluster.empty() ) + { + if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) + { + fprintf(stderr, "node '%s' failed to setenv: %s\n", + node_name.data(), strerror(errno)); + exit(1); + } + } + for ( const auto& s : zeek::supervised_node->scripts ) options.scripts_to_load.emplace_back(s); } From 0ff99c3df8a369f9d06910a22281a601c7da7978 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 13 Jan 2020 20:09:05 -0800 Subject: [PATCH 24/76] Separate supervisor node config and status data structures --- scripts/base/frameworks/supervisor/api.zeek | 15 ++-- scripts/base/frameworks/supervisor/main.zeek | 4 +- src/Supervisor.cc | 93 +++++++++++--------- src/Supervisor.h | 31 +++++-- src/main.cc | 7 +- src/supervisor.bif | 7 +- 6 files changed, 93 insertions(+), 64 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 09a80d6cb5..d11d8a2da5 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -19,7 +19,7 @@ export { interface: string &optional; }; - type Node: record { + type NodeConfig: record { name: string; interface: string &optional; directory: string &optional; @@ -28,18 +28,19 @@ export { scripts: vector of string &default = vector(); cpu_affinity: int &optional; cluster: table[string] of ClusterEndpoint &default=table(); + }; - # TODO: separate node config fields from status fields ? - # TODO: add more status fields ? - pid: count &optional; + type NodeStatus: record { + node: NodeConfig; + pid: count; }; type Status: record { - nodes: table[string] of Node; + nodes: table[string] of NodeStatus; }; global status: function(nodes: string &default="all"): Status; - global create: function(node: Node): string; + global create: function(node: NodeConfig): string; global destroy: function(nodes: string): bool; global restart: function(nodes: string &default="all"): bool; @@ -51,7 +52,7 @@ export { global Supervisor::status_request: event(reqid: string, nodes: string); global Supervisor::status_response: event(reqid: string, result: Status); - global Supervisor::create_request: event(reqid: string, node: Node); + global Supervisor::create_request: event(reqid: string, node: NodeConfig); global Supervisor::create_response: event(reqid: string, result: string); global Supervisor::destroy_request: event(reqid: string, nodes: string); diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index b98fd59847..0dc93a64bc 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -28,7 +28,7 @@ event Supervisor::status_request(reqid: string, nodes: string) Broker::publish(topic, Supervisor::status_response, reqid, res); } -event Supervisor::create_request(reqid: string, node: Node) +event Supervisor::create_request(reqid: string, node: NodeConfig) { local res = Supervisor::create(node); local topic = Supervisor::topic_prefix + fmt("/create_response/%s", reqid); @@ -54,7 +54,7 @@ function Supervisor::status(nodes: string): Status return Supervisor::__status(nodes); } -function Supervisor::create(node: Node): string +function Supervisor::create(node: NodeConfig): string { return Supervisor::__create(node); } diff --git a/src/Supervisor.cc b/src/Supervisor.cc index ff26ca1f28..0ce4329861 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -30,11 +30,11 @@ struct Stem { ~Stem(); - Supervisor::Node* Run(); + std::optional Run(); - Supervisor::Node* Poll(); + std::optional Poll(); - Supervisor::Node* Revive(); + std::optional Revive(); void Reap(); @@ -108,7 +108,7 @@ static std::vector extract_messages(std::string* buffer) return rval; } -static std::string make_create_message(const Supervisor::Node& node) +static std::string make_create_message(const Supervisor::NodeConfig& node) { auto json_str = node.ToJSON(); return fmt("create %s %s", node.name.data(), json_str.data()); @@ -274,13 +274,13 @@ void Supervisor::HandleChildSignal() for ( const auto& n : nodes ) { const auto& node = n.second; - auto msg = make_create_message(node); + auto msg = make_create_message(node.config); safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); } } void Supervisor::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, - iosource::FD_Set* except) + iosource::FD_Set* except) { read->Insert(signal_flare.FD()); read->Insert(stem_pipe->InFD()); @@ -378,7 +378,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( res == -1 ) { fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", - node->name.data(), node->pid, strerror(errno)); + node->Name().data(), node->pid, strerror(errno)); return false; } @@ -387,17 +387,17 @@ bool Stem::Wait(Supervisor::Node* node, int options) const node->exit_status = WEXITSTATUS(status); // TODO: may be some cases where the node is intended to exit printf("node '%s' exited with status %d\n", - node->name.data(), node->exit_status); + node->Name().data(), node->exit_status); } else if ( WIFSIGNALED(status) ) { node->signal_number = WTERMSIG(status); printf("node '%s' terminated by signal %d\n", - node->name.data(), node->signal_number); + node->Name().data(), node->signal_number); } else fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", - node->name.data(), node->pid); + node->Name().data(), node->pid); node->pid = 0; return true; @@ -409,7 +409,7 @@ void Stem::KillNode(const Supervisor::Node& node, int signal) const if ( kill_res == -1 ) fprintf(stderr, "Failed to send signal to node %s: %s", - node.name.data(), strerror(errno)); + node.Name().data(), strerror(errno)); } void Stem::Destroy(Supervisor::Node* node) const @@ -428,12 +428,12 @@ void Stem::Destroy(Supervisor::Node* node) const break; printf("Stem waiting to destroy node: %s (%d)\n", - node->name.data(), node->pid); + node->Name().data(), node->pid); sleep(kill_delay); } } -Supervisor::Node* Stem::Revive() +std::optional Stem::Revive() { constexpr auto attempts_before_delay_increase = 3; constexpr auto delay_increase_factor = 2; @@ -468,7 +468,7 @@ Supervisor::Node* Stem::Revive() node.revival_delay *= delay_increase_factor; if ( Spawn(&node) ) - return new Supervisor::Node(node); + return node.config; ReportStatus(node); } @@ -483,19 +483,19 @@ bool Stem::Spawn(Supervisor::Node* node) if ( node_pid == -1 ) { fprintf(stderr, "failed to fork Zeek node '%s': %s\n", - node->name.data(), strerror(errno)); + node->Name().data(), strerror(errno)); return false; } if ( node_pid == 0 ) { - zeek::set_thread_name(fmt("zeek.%s", node->name.data())); + zeek::set_thread_name(fmt("zeek.%s", node->Name().data())); return true; } node->pid = node_pid; node->spawn_time = std::chrono::steady_clock::now(); - printf("Stem spawned node: %s (%d)\n", node->name.data(), node->pid); + printf("Stem spawned node: %s (%d)\n", node->Name().data(), node->pid); return false; } @@ -563,11 +563,11 @@ void Stem::Shutdown(int exit_code) void Stem::ReportStatus(const Supervisor::Node& node) const { - std::string msg = fmt("status %s %d", node.name.data(), node.pid); + std::string msg = fmt("status %s %d", node.Name().data(), node.pid); safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } -Supervisor::Node* Stem::Run() +std::optional Stem::Run() { for ( ; ; ) { @@ -580,7 +580,7 @@ Supervisor::Node* Stem::Run() return {}; } -Supervisor::Node* Stem::Poll() +std::optional Stem::Poll() { pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, { signal_flare->FD(), POLLIN, 0} }; @@ -659,15 +659,16 @@ Supervisor::Node* Stem::Poll() { const auto& node_json = msg_tokens[2]; assert(nodes.find(node_name) == nodes.end()); - auto node = Supervisor::Node::FromJSON(node_json); + auto node_config = Supervisor::NodeConfig::FromJSON(node_json); + auto it = nodes.emplace(node_name, std::move(node_config)).first; + auto& node = it->second; if ( Spawn(&node) ) - return new Supervisor::Node(node); + return node.config; // TODO: get stem printfs going through standard Zeek debug.log - printf("Stem created node: %s (%d)\n", node.name.data(), node.pid); - auto it = nodes.emplace(node_name, std::move(node)).first; - ReportStatus(it->second); + printf("Stem created node: %s (%d)\n", node.Name().data(), node.pid); + ReportStatus(node); } else if ( cmd == "destroy" ) { @@ -687,7 +688,7 @@ Supervisor::Node* Stem::Poll() Destroy(&node); if ( Spawn(&node) ) - return new Supervisor::Node(node); + return node.config; ReportStatus(node); } @@ -698,7 +699,7 @@ Supervisor::Node* Stem::Poll() return {}; } -Supervisor::Node* Supervisor::RunStem(std::unique_ptr pipe) +std::optional Supervisor::RunStem(std::unique_ptr pipe) { Stem s(std::move(pipe)); return s.Run(); @@ -718,9 +719,9 @@ static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) return BifEnum::Supervisor::NONE; } -Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) +Supervisor::NodeConfig Supervisor::NodeConfig::FromRecord(const RecordVal* node) { - Supervisor::Node rval; + Supervisor::NodeConfig rval; rval.name = node->Lookup("name")->AsString()->CheckString(); auto iface_val = node->Lookup("interface"); @@ -784,9 +785,9 @@ Supervisor::Node Supervisor::Node::FromRecord(const RecordVal* node) return rval; } -Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) +Supervisor::NodeConfig Supervisor::NodeConfig::FromJSON(std::string_view json) { - Supervisor::Node rval; + Supervisor::NodeConfig rval; auto j = nlohmann::json::parse(json); rval.name = j["name"]; @@ -836,7 +837,7 @@ Supervisor::Node Supervisor::Node::FromJSON(std::string_view json) return rval; } -std::string Supervisor::Node::ToJSON() const +std::string Supervisor::NodeConfig::ToJSON() const { auto re = std::make_unique("^_"); auto node_val = ToRecord(); @@ -845,9 +846,9 @@ std::string Supervisor::Node::ToJSON() const return rval; } -IntrusivePtr Supervisor::Node::ToRecord() const +IntrusivePtr Supervisor::NodeConfig::ToRecord() const { - auto rt = BifType::Record::Supervisor::Node; + auto rt = BifType::Record::Supervisor::NodeConfig; auto rval = make_intrusive(rt); rval->Assign(rt->FieldOffset("name"), new StringVal(name)); @@ -866,14 +867,14 @@ IntrusivePtr Supervisor::Node::ToRecord() const if ( cpu_affinity ) rval->Assign(rt->FieldOffset("cpu_affinity"), val_mgr->GetInt(*cpu_affinity)); - auto st = BifType::Record::Supervisor::Node->FieldType("scripts"); + auto st = BifType::Record::Supervisor::NodeConfig->FieldType("scripts"); auto scripts_val = new VectorVal(st->AsVectorType()); rval->Assign(rt->FieldOffset("scripts"), scripts_val); for ( const auto& s : scripts ) scripts_val->Assign(scripts_val->Size(), new StringVal(s)); - auto tt = BifType::Record::Supervisor::Node->FieldType("cluster"); + auto tt = BifType::Record::Supervisor::NodeConfig->FieldType("cluster"); auto cluster_val = new TableVal(tt->AsTableType()); rval->Assign(rt->FieldOffset("cluster"), cluster_val); @@ -895,12 +896,23 @@ IntrusivePtr Supervisor::Node::ToRecord() const cluster_val->Assign(key.get(), val.detach()); } + return rval; + } + +IntrusivePtr Supervisor::Node::ToRecord() const + { + auto rt = BifType::Record::Supervisor::NodeStatus; + auto rval = make_intrusive(rt); + + rval->Assign(rt->FieldOffset("node"), config.ToRecord().detach()); + if ( pid ) rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(pid)); return rval; } + static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRole role) { static auto node_type = global_scope()->Lookup("Cluster::NodeType")->AsType()->AsEnumType(); @@ -919,7 +931,7 @@ static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRol } } -void Supervisor::Node::InitCluster() +void Supervisor::NodeConfig::InitCluster() { auto cluster_node_type = global_scope()->Lookup("Cluster::Node")->AsType()->AsRecordType(); auto cluster_nodes_id = global_scope()->Lookup("Cluster::nodes"); @@ -972,8 +984,9 @@ RecordVal* Supervisor::Status(std::string_view node_name) for ( const auto& n : nodes ) { + const auto& name = n.first; const auto& node = n.second; - auto key = make_intrusive(node.name); + auto key = make_intrusive(name); auto val = node.ToRecord(); node_table_val->Assign(key.get(), val.detach()); } @@ -983,11 +996,11 @@ RecordVal* Supervisor::Status(std::string_view node_name) std::string Supervisor::Create(const RecordVal* node_val) { - auto node = Supervisor::Node::FromRecord(node_val); + auto node = Supervisor::NodeConfig::FromRecord(node_val); return Create(node); } -std::string Supervisor::Create(const Supervisor::Node& node) +std::string Supervisor::Create(const Supervisor::NodeConfig& node) { if ( node.name.find(' ') != std::string::npos ) return fmt("node names must not contain spaces: '%s'", diff --git a/src/Supervisor.h b/src/Supervisor.h index 1eeab43998..8759c9004f 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -35,11 +35,10 @@ public: std::optional interface; }; - struct Node { - static Node FromRecord(const RecordVal* node_val); - static Node FromJSON(std::string_view json); - + struct NodeConfig { static void InitCluster(); + static NodeConfig FromRecord(const RecordVal* node_val); + static NodeConfig FromJSON(std::string_view json); std::string ToJSON() const; IntrusivePtr ToRecord() const; @@ -52,7 +51,18 @@ public: std::optional cpu_affinity; std::vector scripts; std::map cluster; + }; + struct Node { + IntrusivePtr ToRecord() const; + + const std::string& Name() const + { return config.name; } + + Node(NodeConfig arg_config) : config(std::move(arg_config)) + { } + + NodeConfig config; pid_t pid = 0; int exit_status = 0; int signal_number = 0; @@ -61,7 +71,9 @@ public: std::chrono::time_point spawn_time; }; - static Node* RunStem(std::unique_ptr pipe); + static std::optional RunStem(std::unique_ptr pipe); + + using NodeMap = std::map>; Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); @@ -74,10 +86,13 @@ public: RecordVal* Status(std::string_view node_name); std::string Create(const RecordVal* node); - std::string Create(const Supervisor::Node& node); + std::string Create(const Supervisor::NodeConfig& node); bool Destroy(std::string_view node_name); bool Restart(std::string_view node_name); + const NodeMap& Nodes() + { return nodes; } + private: // IOSource interface overrides: @@ -99,11 +114,11 @@ private: pid_t stem_pid; std::unique_ptr stem_pipe; bro::Flare signal_flare; - std::map> nodes; + NodeMap nodes; std::string msg_buffer; }; extern Supervisor* supervisor; -extern Supervisor::Node* supervised_node; +extern std::optional supervised_node; } // namespace zeek diff --git a/src/main.cc b/src/main.cc index 9b7a15ca8d..b6bd67982a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -100,7 +100,7 @@ zeekygen::Manager* zeekygen_mgr = 0; iosource::Manager* iosource_mgr = 0; bro_broker::Manager* broker_mgr = 0; zeek::Supervisor* zeek::supervisor = 0; -zeek::Supervisor::Node* zeek::supervised_node = 0; +std::optional zeek::supervised_node; std::vector zeek_script_prefixes; Stmt* stmts; @@ -286,7 +286,7 @@ struct zeek_options { * and discard the rest. * @param node the supervised-node whose Zeek options are to be modified. */ - void filter_supervised_node_options(zeek::Supervisor::Node* node) + void filter_supervised_node_options(const zeek::Supervisor::NodeConfig& node) { auto og = *this; *this = {}; @@ -752,7 +752,6 @@ void terminate_bro() delete file_mgr; // broker_mgr is deleted via iosource_mgr // supervisor is deleted via iosource_mgr - delete zeek::supervised_node; delete iosource_mgr; delete log_mgr; delete reporter; @@ -992,7 +991,7 @@ int main(int argc, char** argv) node_name.data(), strerror(errno)); } - options.filter_supervised_node_options(zeek::supervised_node); + options.filter_supervised_node_options(*zeek::supervised_node); if ( zeek::supervised_node->interface ) options.interfaces.emplace_back(*zeek::supervised_node->interface); diff --git a/src/supervisor.bif b/src/supervisor.bif index f55e99f65c..41f99c6e73 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -16,7 +16,8 @@ enum ClusterRole %{ type Supervisor::ClusterEndpoint: record; type Supervisor::Status: record; -type Supervisor::Node: record; +type Supervisor::NodeConfig: record; +type Supervisor::NodeStatus: record; function Supervisor::__status%(nodes: string%): Supervisor::Status %{ @@ -29,7 +30,7 @@ function Supervisor::__status%(nodes: string%): Supervisor::Status return zeek::supervisor->Status(nodes->CheckString()); %} -function Supervisor::__create%(node: Supervisor::Node%): string +function Supervisor::__create%(node: Supervisor::NodeConfig%): string %{ if ( ! zeek::supervisor ) { @@ -78,7 +79,7 @@ function Supervisor::__init_cluster%(%): bool function Supervisor::__is_supervised%(%): bool %{ - return val_mgr->GetBool(zeek::supervised_node != nullptr); + return val_mgr->GetBool(zeek::supervised_node.has_value()); %} function Supervisor::__is_supervisor%(%): bool From 80b3aef486d44c341a10d15aae84bac1ac8890ce Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 14 Jan 2020 11:24:46 -0800 Subject: [PATCH 25/76] Improve supervisor debug logging Mainly making stem process debug messages sent up to parent supervisor process and included in its debug.log though option to print to stderr remains in form of environment variable in case debugging breaking change to the IPC mechanism itself. --- src/Supervisor.cc | 119 ++++++++++++++++++++++++++++++++++------------ src/Supervisor.h | 2 + src/main.cc | 14 ++---- 3 files changed, 95 insertions(+), 40 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 0ce4329861..4d9f163ccb 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include "Supervisor.h" @@ -22,6 +23,12 @@ extern "C" { #include "setsignal.h" } +#ifdef DEBUG +#define DBG_STEM(args...) stem->LogDebug(args); +#else +#define DBG_STEM +#endif + using namespace zeek; namespace { @@ -54,6 +61,10 @@ struct Stem { void ReportStatus(const Supervisor::Node& node) const; + void LogDebug(const char* format, ...) const __attribute__((format(printf, 2, 3))); + + void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3))); + std::unique_ptr signal_flare; std::unique_ptr pipe; std::map nodes; @@ -67,7 +78,7 @@ static Stem* stem = nullptr; static RETSIGTYPE stem_sig_handler(int signo) { // TODO: signal safety - printf("Stem received signal: %d\n", signo); + DBG_STEM("Stem received signal: %d", signo); if ( stem->shutting_down ) return RETSIGVAL; @@ -158,6 +169,8 @@ Supervisor::~Supervisor() reporter->Error("Failed to wait for stem process to exit: %s", tmp); } } + + while ( ProcessMessages() != 0 ); } void Supervisor::ObserveChildSignal() @@ -294,7 +307,11 @@ double Supervisor::NextTimestamp(double* local_network_time) void Supervisor::Process() { HandleChildSignal(); + ProcessMessages(); + } +size_t Supervisor::ProcessMessages() + { char buf[256]; int bytes_read = read(stem_pipe->InFD(), buf, 256); @@ -318,9 +335,15 @@ void Supervisor::Process() if ( it != nodes.end() ) it->second.pid = std::stoi(msg_tokens[2]); } + else if ( type == "debug" ) + { + // Already logged the unparsed message above. + } else reporter->Error("Supervisor got unknown msg: %s", msg.data()); } + + return msgs.size(); } Stem::Stem(std::unique_ptr p) @@ -343,8 +366,7 @@ Stem::Stem(std::unique_ptr p) auto res = setpgid(0, 0); if ( res == -1 ) - fprintf(stderr, "failed to set stem process group: %s\n", - strerror(errno)); + LogError("failed to set stem process group: %s", strerror(errno)); } Stem::~Stem() @@ -377,8 +399,8 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( res == -1 ) { - fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", - node->Name().data(), node->pid, strerror(errno)); + LogError("Stem failed to get node exit status %s (%d): %s", + node->Name().data(), node->pid, strerror(errno)); return false; } @@ -386,18 +408,18 @@ bool Stem::Wait(Supervisor::Node* node, int options) const { node->exit_status = WEXITSTATUS(status); // TODO: may be some cases where the node is intended to exit - printf("node '%s' exited with status %d\n", - node->Name().data(), node->exit_status); + DBG_STEM("node '%s' exited with status %d", + node->Name().data(), node->exit_status); } else if ( WIFSIGNALED(status) ) { node->signal_number = WTERMSIG(status); - printf("node '%s' terminated by signal %d\n", - node->Name().data(), node->signal_number); + DBG_STEM("node '%s' terminated by signal %d", + node->Name().data(), node->signal_number); } else - fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", - node->Name().data(), node->pid); + LogError("Stem failed to get node exit status %s (%d)", + node->Name().data(), node->pid); node->pid = 0; return true; @@ -408,8 +430,8 @@ void Stem::KillNode(const Supervisor::Node& node, int signal) const auto kill_res = kill(node.pid, signal); if ( kill_res == -1 ) - fprintf(stderr, "Failed to send signal to node %s: %s", - node.Name().data(), strerror(errno)); + LogError("Failed to send signal to node %s: %s", + node.Name().data(), strerror(errno)); } void Stem::Destroy(Supervisor::Node* node) const @@ -427,8 +449,8 @@ void Stem::Destroy(Supervisor::Node* node) const if ( Wait(node, WNOHANG) ) break; - printf("Stem waiting to destroy node: %s (%d)\n", - node->Name().data(), node->pid); + DBG_STEM("Stem waiting to destroy node: %s (%d)", + node->Name().data(), node->pid); sleep(kill_delay); } } @@ -482,8 +504,8 @@ bool Stem::Spawn(Supervisor::Node* node) if ( node_pid == -1 ) { - fprintf(stderr, "failed to fork Zeek node '%s': %s\n", - node->Name().data(), strerror(errno)); + LogError("failed to fork Zeek node '%s': %s", + node->Name().data(), strerror(errno)); return false; } @@ -495,7 +517,7 @@ bool Stem::Spawn(Supervisor::Node* node) node->pid = node_pid; node->spawn_time = std::chrono::steady_clock::now(); - printf("Stem spawned node: %s (%d)\n", node->Name().data(), node->pid); + DBG_STEM("Stem spawned node: %s (%d)", node->Name().data(), node->pid); return false; } @@ -529,7 +551,7 @@ void Stem::Shutdown(int exit_code) if ( ! nodes.empty() ) { KillNodes(sig); - printf("Stem killed nodes with signal %d\n", sig); + DBG_STEM("Stem killed nodes with signal %d", sig); usleep(10); Reap(); } @@ -539,8 +561,8 @@ void Stem::Shutdown(int exit_code) if ( nodes_alive == 0 ) exit(exit_code); - printf("Stem nodes still alive %d, sleeping for %d seconds\n", - nodes_alive, kill_delay); + DBG_STEM("Stem nodes still alive %d, sleeping for %d seconds", + nodes_alive, kill_delay); auto sleep_time_left = kill_delay; @@ -567,6 +589,44 @@ void Stem::ReportStatus(const Supervisor::Node& node) const safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } +void Stem::LogDebug(const char* format, ...) const + { + va_list args; + va_start(args, format); + auto raw_msg = fmt(format, args); + va_end(args); + + if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) + { + // Useful when debugging a breaking change to the IPC mechanism itself. + fprintf(stderr, "%s\n", raw_msg); + return; + } + + std::string msg = "debug "; + msg += raw_msg; + safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); + } + +void Stem::LogError(const char* format, ...) const + { + va_list args; + va_start(args, format); + std::string msg = fmt(format, args); + va_end(args); + + fprintf(stderr, "%s\n", msg.data()); + + #ifdef DEBUG + if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) + // Essentially already emitted above. + return; + + // Useful to also insert the error message into the debug log. + LogDebug("%s", msg.data()); + #endif + } + std::optional Stem::Run() { for ( ; ; ) @@ -591,7 +651,7 @@ std::optional Stem::Poll() { if ( errno != EINTR ) { - fprintf(stderr, "Stem poll() failed: %s\n", strerror(errno)); + LogError("Stem poll() failed: %s", strerror(errno)); return {}; } } @@ -601,7 +661,7 @@ std::optional Stem::Poll() // TODO: better way to detect loss of parent than polling ? // e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux // or procctl(PROC_PDEATHSIG_CTL) on FreeBSD - printf("Stem suicide\n"); + DBG_STEM("Stem suicide"); Shutdown(13); } @@ -634,13 +694,13 @@ std::optional Stem::Poll() if ( bytes_read == 0 ) { // EOF, supervisor must have exited - printf("Stem EOF\n"); + DBG_STEM("Stem EOF"); Shutdown(14); } if ( bytes_read < 0 ) { - fprintf(stderr, "Stem read() failed: %s\n", strerror(errno)); + LogError("Stem read() failed: %s", strerror(errno)); return {}; } @@ -666,8 +726,7 @@ std::optional Stem::Poll() if ( Spawn(&node) ) return node.config; - // TODO: get stem printfs going through standard Zeek debug.log - printf("Stem created node: %s (%d)\n", node.Name().data(), node.pid); + DBG_STEM("Stem created node: %s (%d)", node.Name().data(), node.pid); ReportStatus(node); } else if ( cmd == "destroy" ) @@ -675,7 +734,7 @@ std::optional Stem::Poll() auto it = nodes.find(node_name); assert(it != nodes.end()); auto& node = it->second; - printf("Stem destroying node: %s\n", node_name.data()); + DBG_STEM("Stem destroying node: %s", node_name.data()); Destroy(&node); nodes.erase(it); } @@ -684,7 +743,7 @@ std::optional Stem::Poll() auto it = nodes.find(node_name); assert(it != nodes.end()); auto& node = it->second; - printf("Stem restarting node: %s\n", node_name.data()); + DBG_STEM("Stem restarting node: %s", node_name.data()); Destroy(&node); if ( Spawn(&node) ) @@ -693,7 +752,7 @@ std::optional Stem::Poll() ReportStatus(node); } else - fprintf(stderr, "unknown supervisor message: %s", cmd.data()); + LogError("Stem got unknown supervisor message: %s", cmd.data()); } return {}; diff --git a/src/Supervisor.h b/src/Supervisor.h index 8759c9004f..bcfbdfe568 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -103,6 +103,8 @@ private: void Process() override; + size_t ProcessMessages(); + void HandleChildSignal(); void ReapStem(); diff --git a/src/main.cc b/src/main.cc index b6bd67982a..6e8a34fec4 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1067,17 +1067,11 @@ int main(int argc, char** argv) if ( options.debug_log_streams ) { debug_logger.EnableStreams(options.debug_log_streams->data()); - const char* debug_log_name = nullptr; - if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) - { - if ( is_supervisor(options) ) - debug_log_name = "debug-supervisor"; - else - debug_log_name = "debug"; - } - - debug_logger.OpenDebugLog(debug_log_name); + if ( getenv("ZEEK_DEBUG_LOG_STDERR") ) + debug_logger.OpenDebugLog(nullptr); + else + debug_logger.OpenDebugLog("debug"); } #endif From 4d712d6203c65d13a0f343ab0ccb9bc475647072 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 14 Jan 2020 13:41:46 -0800 Subject: [PATCH 26/76] Cleanup minor Supervisor TODOs e.g. Mainly making default parameter for restart/destroy/status API calls to operate on all nodes. --- scripts/base/frameworks/supervisor/api.zeek | 22 +-- scripts/base/frameworks/supervisor/main.zeek | 24 +-- src/Supervisor.cc | 101 ++++++++---- src/main.cc | 156 +++++++++---------- src/supervisor.bif | 12 +- 5 files changed, 178 insertions(+), 137 deletions(-) diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index d11d8a2da5..a7593b8f3e 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -39,25 +39,25 @@ export { nodes: table[string] of NodeStatus; }; - global status: function(nodes: string &default="all"): Status; global create: function(node: NodeConfig): string; - global destroy: function(nodes: string): bool; - global restart: function(nodes: string &default="all"): bool; + global status: function(node: string &default=""): Status; + global restart: function(node: string &default=""): bool; + global destroy: function(node: string &default=""): bool; global is_supervisor: function(): bool; global is_supervised: function(): bool; - global Supervisor::stop_request: event(); - - global Supervisor::status_request: event(reqid: string, nodes: string); - global Supervisor::status_response: event(reqid: string, result: Status); - global Supervisor::create_request: event(reqid: string, node: NodeConfig); global Supervisor::create_response: event(reqid: string, result: string); - global Supervisor::destroy_request: event(reqid: string, nodes: string); + global Supervisor::status_request: event(reqid: string, node: string); + global Supervisor::status_response: event(reqid: string, result: Status); + + global Supervisor::restart_request: event(reqid: string, node: string); + global Supervisor::restart_response: event(reqid: string, result: bool); + + global Supervisor::destroy_request: event(reqid: string, node: string); global Supervisor::destroy_response: event(reqid: string, result: bool); - global Supervisor::restart_request: event(reqid: string, nodes: string); - global Supervisor::restart_response: event(reqid: string, result: bool); + global Supervisor::stop_request: event(); } diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 0dc93a64bc..3e42331b71 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -21,9 +21,9 @@ event Supervisor::stop_request() terminate(); } -event Supervisor::status_request(reqid: string, nodes: string) +event Supervisor::status_request(reqid: string, node: string) { - local res = Supervisor::status(nodes); + local res = Supervisor::status(node); local topic = Supervisor::topic_prefix + fmt("/status_response/%s", reqid); Broker::publish(topic, Supervisor::status_response, reqid, res); } @@ -35,23 +35,23 @@ event Supervisor::create_request(reqid: string, node: NodeConfig) Broker::publish(topic, Supervisor::create_response, reqid, res); } -event Supervisor::destroy_request(reqid: string, nodes: string) +event Supervisor::destroy_request(reqid: string, node: string) { - local res = Supervisor::destroy(nodes); + local res = Supervisor::destroy(node); local topic = Supervisor::topic_prefix + fmt("/destroy_response/%s", reqid); Broker::publish(topic, Supervisor::destroy_response, reqid, res); } -event Supervisor::restart_request(reqid: string, nodes: string) +event Supervisor::restart_request(reqid: string, node: string) { - local res = Supervisor::restart(nodes); + local res = Supervisor::restart(node); local topic = Supervisor::topic_prefix + fmt("/restart_response/%s", reqid); Broker::publish(topic, Supervisor::restart_response, reqid, res); } -function Supervisor::status(nodes: string): Status +function Supervisor::status(node: string): Status { - return Supervisor::__status(nodes); + return Supervisor::__status(node); } function Supervisor::create(node: NodeConfig): string @@ -59,14 +59,14 @@ function Supervisor::create(node: NodeConfig): string return Supervisor::__create(node); } -function Supervisor::destroy(nodes: string): bool +function Supervisor::destroy(node: string): bool { - return Supervisor::__destroy(nodes); + return Supervisor::__destroy(node); } -function Supervisor::restart(nodes: string): bool +function Supervisor::restart(node: string): bool { - return Supervisor::__restart(nodes); + return Supervisor::__restart(node); } function is_supervisor(): bool diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 4d9f163ccb..755e6e8032 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -148,7 +148,6 @@ Supervisor::~Supervisor() DBG_LOG(DBG_SUPERVISOR, "shutdown, killing stem process %d", stem_pid); - // TODO: is signal the best way to trigger shutdown of decendent processes? auto kill_res = kill(stem_pid, SIGTERM); if ( kill_res == -1 ) @@ -232,7 +231,6 @@ void Supervisor::HandleChildSignal() return; // Revive the Stem process - // TODO: Stem process needs a way to inform Supervisor not to revive stem_pid = fork(); if ( stem_pid == -1 ) @@ -280,10 +278,13 @@ void Supervisor::HandleChildSignal() DBG_LOG(DBG_SUPERVISOR, "stem process revived, new pid: %d", stem_pid); // Parent supervisor process resends node configurations to recreate - // the desired process hierarchy + // the desired process hierarchy. - // TODO: probably a preferred order in which to create nodes - // e.g. logger, manager, proxy, worker + // Note: there's probably a preferred order in which to create nodes. + // E.g. logger, manager, proxy, worker. However, fully synchronizing + // a startup order like that is slow and complicated: essentially have + // to wait for each process to start up and reach the point just after + // it starts listening (and maybe that never happens for some error case). for ( const auto& n : nodes ) { const auto& node = n.second; @@ -355,14 +356,16 @@ Stem::Stem(std::unique_ptr p) setsignal(SIGCHLD, stem_sig_handler); setsignal(SIGTERM, stem_sig_handler); - // TODO: changing the process group here so that SIGINT to the - // supervisor doesn't also get passed to the children. i.e. supervisor - // should be in charge of initiating orderly shutdown. But calling - // just setpgid() like this is technically a race-condition -- need - // to do more work of blocking SIGINT before fork(), unblocking after, - // then also calling setpgid() from parent. And just not doing that - // until more is known whether that's the right SIGINT behavior in - // the first place. + // Note: changing the process group here so that SIGINT to the supervisor + // doesn't also get passed to the children. I.e. the supervisor should be + // in charge of initiating orderly shutdown of the process tree. + // Technically calling setpgid() like this is a race-condition (if we get a + // SIGINT in between the fork() and setpgid() calls), but can treat that as + // mostly be harmless since the only affected node in the process tree at + // the point will be this Stem process and the Supervisor *should* do the + // right thing if it also sees SIGINT with the Stem already having exited + // (since that same type of situation with the Stem dying prematurely can + // happen for any arbitrary reason, not just for SIGINT). auto res = setpgid(0, 0); if ( res == -1 ) @@ -407,7 +410,6 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( WIFEXITED(status) ) { node->exit_status = WEXITSTATUS(status); - // TODO: may be some cases where the node is intended to exit DBG_STEM("node '%s' exited with status %d", node->Name().data(), node->exit_status); } @@ -709,7 +711,6 @@ std::optional Stem::Poll() for ( auto& msg : msgs ) { - // TODO: improve message format ... std::vector msg_tokens; tokenize_string(std::move(msg), " ", &msg_tokens, 2); const auto& cmd = msg_tokens[0]; @@ -1035,16 +1036,31 @@ void Supervisor::NodeConfig::InitCluster() RecordVal* Supervisor::Status(std::string_view node_name) { - // TODO: handle node classes auto rval = new RecordVal(BifType::Record::Supervisor::Status); auto tt = BifType::Record::Supervisor::Status->FieldType("nodes"); auto node_table_val = new TableVal(tt->AsTableType()); rval->Assign(0, node_table_val); - for ( const auto& n : nodes ) + if ( node_name.empty() ) { - const auto& name = n.first; - const auto& node = n.second; + for ( const auto& n : nodes ) + { + const auto& name = n.first; + const auto& node = n.second; + auto key = make_intrusive(name); + auto val = node.ToRecord(); + node_table_val->Assign(key.get(), val.detach()); + } + } + else + { + auto it = nodes.find(node_name); + + if ( it == nodes.end() ) + return rval; + + const auto& name = it->first; + const auto& node = it->second; auto key = make_intrusive(name); auto val = node.ToRecord(); node_table_val->Assign(key.get(), val.detach()); @@ -1061,6 +1077,9 @@ std::string Supervisor::Create(const RecordVal* node_val) std::string Supervisor::Create(const Supervisor::NodeConfig& node) { + if ( node.name.empty() ) + return "node names must not be an empty string"; + if ( node.name.find(' ') != std::string::npos ) return fmt("node names must not contain spaces: '%s'", node.name.data()); @@ -1085,7 +1104,22 @@ std::string Supervisor::Create(const Supervisor::NodeConfig& node) bool Supervisor::Destroy(std::string_view node_name) { - // TODO: handle node classes + auto send_destroy_msg = [this](std::string_view name) + { + std::stringstream ss; + ss << "destroy " << name; + std::string msg = ss.str(); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + }; + + if ( node_name.empty() ) + { + for ( const auto& n : nodes ) + send_destroy_msg(n.first); + + nodes.clear(); + return true; + } auto it = nodes.find(node_name); @@ -1093,24 +1127,31 @@ bool Supervisor::Destroy(std::string_view node_name) return false; nodes.erase(it); - - std::stringstream ss; - ss << "destroy " << node_name; - std::string msg = ss.str(); - safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + send_destroy_msg(node_name); return true; } bool Supervisor::Restart(std::string_view node_name) { - // TODO: handle node classes + auto send_restart_msg = [this](std::string_view name) + { + std::stringstream ss; + ss << "restart " << name; + std::string msg = ss.str(); + safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + }; + + if ( node_name.empty() ) + { + for ( const auto& n : nodes ) + send_restart_msg(n.first); + + return true; + } if ( nodes.find(node_name) == nodes.end() ) return false; - std::stringstream ss; - ss << "restart " << node_name; - std::string msg = ss.str(); - safe_write(stem_pipe->OutFD(), msg.data(), msg.size() + 1); + send_restart_msg(node_name); return true; } diff --git a/src/main.cc b/src/main.cc index 6e8a34fec4..6929067975 100644 --- a/src/main.cc +++ b/src/main.cc @@ -284,9 +284,8 @@ struct zeek_options { /** * Inherit certain options set in the original supervisor parent process * and discard the rest. - * @param node the supervised-node whose Zeek options are to be modified. */ - void filter_supervised_node_options(const zeek::Supervisor::NodeConfig& node) + void filter_supervised_node_options() { auto og = *this; *this = {}; @@ -327,6 +326,82 @@ struct zeek_options { } }; +static void init_supervised_node(zeek_options* options) + { + const auto& node_name = zeek::supervised_node->name; + + if ( zeek::supervised_node->directory ) + { + if ( chdir(zeek::supervised_node->directory->data()) ) + { + fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", + node_name.data(), + zeek::supervised_node->directory->data(), + strerror(errno)); + exit(1); + } + } + + if ( zeek::supervised_node->stderr_file ) + { + auto fd = open(zeek::supervised_node->stderr_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", + node_name.data(), + zeek::supervised_node->stderr_file->data(), + strerror(errno)); + exit(1); + } + } + + if ( zeek::supervised_node->stdout_file ) + { + auto fd = open(zeek::supervised_node->stdout_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", + node_name.data(), + zeek::supervised_node->stdout_file->data(), + strerror(errno)); + exit(1); + } + } + + if ( zeek::supervised_node->cpu_affinity ) + { + auto res = zeek::set_affinity(*zeek::supervised_node->cpu_affinity); + + if ( ! res ) + fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", + node_name.data(), strerror(errno)); + } + + options->filter_supervised_node_options(); + + if ( zeek::supervised_node->interface ) + options->interfaces.emplace_back(*zeek::supervised_node->interface); + + if ( ! zeek::supervised_node->cluster.empty() ) + { + if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) + { + fprintf(stderr, "node '%s' failed to setenv: %s\n", + node_name.data(), strerror(errno)); + exit(1); + } + } + + for ( const auto& s : zeek::supervised_node->scripts ) + options->scripts_to_load.emplace_back(s); + } + static std::vector to_cargs(const std::vector& args) { std::vector rval; @@ -933,82 +1008,7 @@ int main(int argc, char** argv) } if ( zeek::supervised_node ) - { - // TODO: probably all of this block could move to a new - // zeek::supervised_node->Init(options) method - const auto& node_name = zeek::supervised_node->name; - - if ( zeek::supervised_node->directory ) - { - if ( chdir(zeek::supervised_node->directory->data()) ) - { - fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", - node_name.data(), - zeek::supervised_node->directory->data(), - strerror(errno)); - exit(1); - } - } - - if ( zeek::supervised_node->stderr_file ) - { - auto fd = open(zeek::supervised_node->stderr_file->data(), - O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, - 0600); - - if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) - { - fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", - node_name.data(), - zeek::supervised_node->stderr_file->data(), - strerror(errno)); - exit(1); - } - } - - if ( zeek::supervised_node->stdout_file ) - { - auto fd = open(zeek::supervised_node->stdout_file->data(), - O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, - 0600); - - if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) - { - fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", - node_name.data(), - zeek::supervised_node->stdout_file->data(), - strerror(errno)); - exit(1); - } - } - - if ( zeek::supervised_node->cpu_affinity ) - { - auto res = zeek::set_affinity(*zeek::supervised_node->cpu_affinity); - - if ( ! res ) - fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", - node_name.data(), strerror(errno)); - } - - options.filter_supervised_node_options(*zeek::supervised_node); - - if ( zeek::supervised_node->interface ) - options.interfaces.emplace_back(*zeek::supervised_node->interface); - - if ( ! zeek::supervised_node->cluster.empty() ) - { - if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) - { - fprintf(stderr, "node '%s' failed to setenv: %s\n", - node_name.data(), strerror(errno)); - exit(1); - } - } - - for ( const auto& s : zeek::supervised_node->scripts ) - options.scripts_to_load.emplace_back(s); - } + init_supervised_node(&options); double time_start = current_time(true); diff --git a/src/supervisor.bif b/src/supervisor.bif index 41f99c6e73..6990bf8385 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -19,7 +19,7 @@ type Supervisor::Status: record; type Supervisor::NodeConfig: record; type Supervisor::NodeStatus: record; -function Supervisor::__status%(nodes: string%): Supervisor::Status +function Supervisor::__status%(node: string%): Supervisor::Status %{ if ( ! zeek::supervisor ) { @@ -27,7 +27,7 @@ function Supervisor::__status%(nodes: string%): Supervisor::Status return new RecordVal(BifType::Record::Supervisor::Status); } - return zeek::supervisor->Status(nodes->CheckString()); + return zeek::supervisor->Status(node->CheckString()); %} function Supervisor::__create%(node: Supervisor::NodeConfig%): string @@ -42,7 +42,7 @@ function Supervisor::__create%(node: Supervisor::NodeConfig%): string return new StringVal(rval); %} -function Supervisor::__destroy%(nodes: string%): bool +function Supervisor::__destroy%(node: string%): bool %{ if ( ! zeek::supervisor ) { @@ -50,11 +50,11 @@ function Supervisor::__destroy%(nodes: string%): bool return val_mgr->GetBool(false); } - auto rval = zeek::supervisor->Destroy(nodes->CheckString()); + auto rval = zeek::supervisor->Destroy(node->CheckString()); return val_mgr->GetBool(rval); %} -function Supervisor::__restart%(nodes: string%): bool +function Supervisor::__restart%(node: string%): bool %{ if ( ! zeek::supervisor ) { @@ -62,7 +62,7 @@ function Supervisor::__restart%(nodes: string%): bool return val_mgr->GetBool(false); } - auto rval = zeek::supervisor->Restart(nodes->CheckString()); + auto rval = zeek::supervisor->Restart(node->CheckString()); return val_mgr->GetBool(rval); %} From 3e1a9ebec37bbb6c5d01fe74bf804402999132a3 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 14 Jan 2020 17:33:37 -0800 Subject: [PATCH 27/76] Remove unused supervisor config options Since those related to offline pcap reading are not implemented yet. --- src/Supervisor.cc | 1 - src/Supervisor.h | 2 -- src/main.cc | 20 ++++++++++---------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 755e6e8032..4de9e88267 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -131,7 +131,6 @@ Supervisor::Supervisor(Supervisor::Config cfg, : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) { DBG_LOG(DBG_SUPERVISOR, "forked stem process %d", stem_pid); - DBG_LOG(DBG_SUPERVISOR, "using %d workers", config.num_workers); setsignal(SIGCHLD, supervisor_sig_handler); SetIdle(true); } diff --git a/src/Supervisor.h b/src/Supervisor.h index bcfbdfe568..2d74164e52 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -23,8 +23,6 @@ class Supervisor : public iosource::IOSource { public: struct Config { - int num_workers = 1; - std::vector pcaps; std::string zeek_exe_path; }; diff --git a/src/main.cc b/src/main.cc index 6929067975..376dbfb77d 100644 --- a/src/main.cc +++ b/src/main.cc @@ -235,13 +235,13 @@ struct zeek_options { std::optional script_code_to_exec; std::vector script_prefixes = { "" }; // "" = "no prefix" - int supervised_workers = 0; int signature_re_level = 4; bool ignore_checksums = false; bool use_watchdog = false; double pseudo_realtime = 0; DNS_MgrMode dns_mode = DNS_DEFAULT; + bool supervisor_mode = false; bool parse_only = false; bool bare_mode = false; bool debug_scripts = false; @@ -556,9 +556,13 @@ static zeek_options parse_cmdline(int argc, char** argv) rval.interfaces.emplace_back(optarg); break; case 'j': - rval.supervised_workers = 1; + rval.supervisor_mode = true; if ( optarg ) - rval.supervised_workers = atoi(optarg); + { + // TODO: for supervised offline pcap reading, the argument is + // expected to be number of workers like "-j 4" or possibly a + // list of worker/proxy/logger counts like "-j 4,2,1" + } break; case 'p': rval.script_prefixes.emplace_back(optarg); @@ -967,8 +971,6 @@ int main(int argc, char** argv) pid_t stem_pid = 0; std::unique_ptr supervisor_pipe; auto zeek_stem_env = getenv("ZEEK_STEM"); - auto is_supervisor = [](const zeek_options& os) -> bool - { return os.supervised_workers > 0; }; if ( zeek_stem_env ) { @@ -990,7 +992,7 @@ int main(int argc, char** argv) supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); } - else if ( is_supervisor(options) ) + else if ( options.supervisor_mode ) { // TODO: the SIGCHLD handler should be set before fork() supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); @@ -1075,11 +1077,9 @@ int main(int argc, char** argv) } #endif - if ( is_supervisor(options) ) + if ( options.supervisor_mode ) { zeek::Supervisor::Config cfg = {}; - cfg.pcaps = options.pcap_files; - cfg.num_workers = options.supervised_workers; cfg.zeek_exe_path = zeek_exe_path; options.filter_supervisor_options(); zeek::supervisor = new zeek::Supervisor(std::move(cfg), @@ -1139,7 +1139,7 @@ int main(int argc, char** argv) options.interfaces.size() == 0 && ! options.identifier_to_print && ! command_line_policy && ! options.print_plugins && - ! is_supervisor(options) && ! zeek::supervised_node ) + ! options.supervisor_mode && ! zeek::supervised_node ) add_input_file("-"); for ( const auto& script_option : options.script_options_to_set ) From f5b3673890a408b5468a2a88316762689ef66897 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 14 Jan 2020 18:56:34 -0800 Subject: [PATCH 28/76] Improve supervisor signal handler safety Now should only be making async-signal-safe calls --- src/Flare.cc | 13 ++++++++----- src/Flare.h | 8 ++++++-- src/Supervisor.cc | 33 ++++++++++++++++++++++----------- src/Supervisor.h | 3 ++- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/Flare.cc b/src/Flare.cc index 4229ffb63c..166917d914 100644 --- a/src/Flare.cc +++ b/src/Flare.cc @@ -13,8 +13,11 @@ Flare::Flare() { } -static void bad_pipe_op(const char* which) +static void bad_pipe_op(const char* which, bool signal_safe) { + if ( signal_safe ) + abort(); + char buf[256]; bro_strerror_r(errno, buf, sizeof(buf)); @@ -27,7 +30,7 @@ static void bad_pipe_op(const char* which) } } -void Flare::Fire() +void Flare::Fire(bool signal_safe) { char tmp = 0; @@ -49,14 +52,14 @@ void Flare::Fire() // Interrupted: try again. continue; - bad_pipe_op("write"); + bad_pipe_op("write", signal_safe); } // No error, but didn't write a byte: try again. } } -int Flare::Extinguish() +int Flare::Extinguish(bool signal_safe) { int rval = 0; char tmp[256]; @@ -80,7 +83,7 @@ int Flare::Extinguish() // Interrupted: try again. continue; - bad_pipe_op("read"); + bad_pipe_op("read", signal_safe); } return rval; diff --git a/src/Flare.h b/src/Flare.h index ebe902c172..4c781387bf 100644 --- a/src/Flare.h +++ b/src/Flare.h @@ -26,15 +26,19 @@ public: /** * Put the object in the "ready" state. + * @param signal_safe whether to skip error-reporting functionality that + * is not async-signal-safe */ - void Fire(); + void Fire(bool signal_safe = false); /** * Take the object out of the "ready" state. + * @param signal_safe whether to skip error-reporting functionality that + * is not async-signal-safe * @return number of bytes read from the pipe, corresponds to the number * of times Fire() was called. */ - int Extinguish(); + int Extinguish(bool signal_safe = false); private: Pipe pipe; diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 4de9e88267..4c4484029e 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -65,6 +65,7 @@ struct Stem { void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3))); + int last_signal = -1; std::unique_ptr signal_flare; std::unique_ptr pipe; std::map nodes; @@ -77,13 +78,12 @@ static Stem* stem = nullptr; static RETSIGTYPE stem_sig_handler(int signo) { - // TODO: signal safety - DBG_STEM("Stem received signal: %d", signo); + stem->last_signal = signo; if ( stem->shutting_down ) return RETSIGVAL; - stem->signal_flare->Fire(); + stem->signal_flare->Fire(true); if ( signo == SIGTERM ) stem->shutting_down = true; @@ -93,9 +93,7 @@ static RETSIGTYPE stem_sig_handler(int signo) static RETSIGTYPE supervisor_sig_handler(int signo) { - // TODO: signal safety - DBG_LOG(DBG_SUPERVISOR, "received signal: %d", signo); - supervisor->ObserveChildSignal(); + supervisor->ObserveChildSignal(signo); return RETSIGVAL; } @@ -126,8 +124,8 @@ static std::string make_create_message(const Supervisor::NodeConfig& node) } Supervisor::Supervisor(Supervisor::Config cfg, - std::unique_ptr pipe, - pid_t arg_stem_pid) + std::unique_ptr pipe, + pid_t arg_stem_pid) : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) { DBG_LOG(DBG_SUPERVISOR, "forked stem process %d", stem_pid); @@ -171,9 +169,10 @@ Supervisor::~Supervisor() while ( ProcessMessages() != 0 ); } -void Supervisor::ObserveChildSignal() +void Supervisor::ObserveChildSignal(int signo) { - signal_flare.Fire(); + last_signal = signo; + signal_flare.Fire(true); } void Supervisor::ReapStem() @@ -216,13 +215,19 @@ void Supervisor::ReapStem() void Supervisor::HandleChildSignal() { + if ( last_signal >= 0 ) + { + DBG_LOG(DBG_SUPERVISOR, "Supervisor received signal %d", last_signal); + last_signal = -1; + } + bool had_child_signal = signal_flare.Extinguish(); if ( had_child_signal ) { ReapStem(); - DBG_LOG(DBG_SUPERVISOR, "processed SIGCHLD %s", + DBG_LOG(DBG_SUPERVISOR, "Supervisor processed child signal %s", stem_pid ? "(spurious)" : ""); } @@ -657,6 +662,12 @@ std::optional Stem::Poll() } } + if ( last_signal >= 0 ) + { + DBG_STEM("Stem received signal: %d", last_signal); + last_signal = -1; + } + if ( getppid() == 1 ) { // TODO: better way to detect loss of parent than polling ? diff --git a/src/Supervisor.h b/src/Supervisor.h index 2d74164e52..01429e6508 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -80,7 +80,7 @@ public: pid_t StemPID() const { return stem_pid; } - void ObserveChildSignal(); + void ObserveChildSignal(int signo); RecordVal* Status(std::string_view node_name); std::string Create(const RecordVal* node); @@ -113,6 +113,7 @@ private: Config config; pid_t stem_pid; std::unique_ptr stem_pipe; + int last_signal = -1; bro::Flare signal_flare; NodeMap nodes; std::string msg_buffer; From 899a987527f6c316171991430fff9801030dd166 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 14 Jan 2020 20:14:03 -0800 Subject: [PATCH 29/76] Improve handling of premature supervisor stem exit i.e. if the stem process terminates before the supervisor registers a SIGCHLD handler. --- src/Supervisor.cc | 37 ++++++++++++++++++++++++++++++++----- src/main.cc | 1 - 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 4c4484029e..8bf659d011 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -76,7 +76,7 @@ struct Stem { static Stem* stem = nullptr; -static RETSIGTYPE stem_sig_handler(int signo) +static RETSIGTYPE stem_signal_handler(int signo) { stem->last_signal = signo; @@ -91,7 +91,7 @@ static RETSIGTYPE stem_sig_handler(int signo) return RETSIGVAL; } -static RETSIGTYPE supervisor_sig_handler(int signo) +static RETSIGTYPE supervisor_signal_handler(int signo) { supervisor->ObserveChildSignal(signo); return RETSIGVAL; @@ -129,8 +129,33 @@ Supervisor::Supervisor(Supervisor::Config cfg, : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) { DBG_LOG(DBG_SUPERVISOR, "forked stem process %d", stem_pid); - setsignal(SIGCHLD, supervisor_sig_handler); + setsignal(SIGCHLD, supervisor_signal_handler); SetIdle(true); + + int status; + auto res = waitpid(stem_pid, &status, WNOHANG); + + if ( res == 0 ) + // Good, stem process is alive and the SIGCHLD handler will keep it so. + return; + + if ( res == -1 ) + fprintf(stderr, "Supervisor failed to get status of stem process: %s\n", + strerror(errno)); + else + { + if ( WIFEXITED(status) ) + fprintf(stderr, "Supervisor stem died early with exit code %d\n", + WEXITSTATUS(status)); + else if ( WIFSIGNALED(status) ) + fprintf(stderr, "Supervisor stem died early by signal %d\n", + WTERMSIG(status)); + else + fprintf(stderr, "Supervisor stem died early for unknown reason\n", + WTERMSIG(status)); + } + + exit(1); } Supervisor::~Supervisor() @@ -357,8 +382,8 @@ Stem::Stem(std::unique_ptr p) zeek::set_thread_name("zeek.stem"); pipe->Swap(); stem = this; - setsignal(SIGCHLD, stem_sig_handler); - setsignal(SIGTERM, stem_sig_handler); + setsignal(SIGCHLD, stem_signal_handler); + setsignal(SIGTERM, stem_signal_handler); // Note: changing the process group here so that SIGINT to the supervisor // doesn't also get passed to the children. I.e. the supervisor should be @@ -517,6 +542,8 @@ bool Stem::Spawn(Supervisor::Node* node) if ( node_pid == 0 ) { + setsignal(SIGCHLD, SIG_DFL); + setsignal(SIGTERM, SIG_DFL); zeek::set_thread_name(fmt("zeek.%s", node->Name().data())); return true; } diff --git a/src/main.cc b/src/main.cc index 376dbfb77d..a5c23c5a87 100644 --- a/src/main.cc +++ b/src/main.cc @@ -994,7 +994,6 @@ int main(int argc, char** argv) } else if ( options.supervisor_mode ) { - // TODO: the SIGCHLD handler should be set before fork() supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); stem_pid = fork(); From 7ddd311583e42e091781af7c1c7c503df118f5e1 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 15 Jan 2020 14:24:53 -0800 Subject: [PATCH 30/76] Improve supervisor checks for parent process termination Comparing parent process ID to 1 to detect loss of parent process was not necessarily portable, so now it stores parent PID pre-fork and then monitors for any change. --- src/Net.cc | 10 +++++- src/Supervisor.cc | 82 +++++++++++++++++++++++++++++----------------- src/Supervisor.h | 13 ++++++-- src/main.cc | 54 +++++++++++++++--------------- src/supervisor.bif | 7 ++-- 5 files changed, 101 insertions(+), 65 deletions(-) diff --git a/src/Net.cc b/src/Net.cc index 1009f5d3de..481804ce4c 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -290,7 +290,15 @@ void net_run() while ( iosource_mgr->Size() || (BifConst::exit_only_after_terminate && ! terminating) ) { - if ( zeek::supervised_node && getppid() == 1 ) + // Note: only simple + portable way of detecting loss of parent + // process seems to be polling for change in PPID. There's platform + // specific ways if we do end up needing something more responsive + // and/or have to avoid overhead of polling, but maybe not worth + // the additional complexity: + // Linux: prctl(PR_SET_PDEATHSIG, ...) + // FreeBSD: procctl(PROC_PDEATHSIG_CTL) + // TODO: make this a proper timer + if ( zeek::supervised_node && zeek::supervised_node->parent_pid != getppid() ) zeek_terminate_loop("supervised cluster node was orphaned"); double ts; diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 8bf659d011..4899a65f8c 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -33,19 +33,19 @@ using namespace zeek; namespace { struct Stem { - Stem(std::unique_ptr p); + Stem(std::unique_ptr p, pid_t parent_pid); ~Stem(); - std::optional Run(); + std::optional Run(); - std::optional Poll(); + std::optional Poll(); - std::optional Revive(); + std::optional Revive(); void Reap(); - bool Spawn(Supervisor::Node* node); + std::optional Spawn(Supervisor::Node* node); int AliveNodeCount() const; @@ -65,6 +65,7 @@ struct Stem { void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3))); + pid_t parent_pid; int last_signal = -1; std::unique_ptr signal_flare; std::unique_ptr pipe; @@ -260,6 +261,7 @@ void Supervisor::HandleChildSignal() return; // Revive the Stem process + auto stem_ppid = getpid(); stem_pid = fork(); if ( stem_pid == -1 ) @@ -277,7 +279,7 @@ void Supervisor::HandleChildSignal() if ( stem_pid == 0 ) { // Child stem process needs to exec() - auto stem_env = fmt("%d,%d,%d,%d", + auto stem_env = fmt("%d,%d,%d,%d,%d", stem_ppid, stem_pipe->In().ReadFD(), stem_pipe->In().WriteFD(), stem_pipe->Out().ReadFD(), stem_pipe->Out().WriteFD()); @@ -376,8 +378,8 @@ size_t Supervisor::ProcessMessages() return msgs.size(); } -Stem::Stem(std::unique_ptr p) - : signal_flare(new bro::Flare()), pipe(std::move(p)) +Stem::Stem(std::unique_ptr p, pid_t ppid) + : parent_pid(ppid), signal_flare(new bro::Flare()), pipe(std::move(p)) { zeek::set_thread_name("zeek.stem"); pipe->Swap(); @@ -486,7 +488,7 @@ void Stem::Destroy(Supervisor::Node* node) const } } -std::optional Stem::Revive() +std::optional Stem::Revive() { constexpr auto attempts_before_delay_increase = 3; constexpr auto delay_increase_factor = 2; @@ -520,8 +522,10 @@ std::optional Stem::Revive() if ( node.revival_attempts % attempts_before_delay_increase == 0 ) node.revival_delay *= delay_increase_factor; - if ( Spawn(&node) ) - return node.config; + auto sn = Spawn(&node); + + if ( sn ) + return sn; ReportStatus(node); } @@ -529,15 +533,16 @@ std::optional Stem::Revive() return {}; } -bool Stem::Spawn(Supervisor::Node* node) +std::optional Stem::Spawn(Supervisor::Node* node) { + auto ppid = getpid(); auto node_pid = fork(); if ( node_pid == -1 ) { LogError("failed to fork Zeek node '%s': %s", node->Name().data(), strerror(errno)); - return false; + return {}; } if ( node_pid == 0 ) @@ -545,13 +550,16 @@ bool Stem::Spawn(Supervisor::Node* node) setsignal(SIGCHLD, SIG_DFL); setsignal(SIGTERM, SIG_DFL); zeek::set_thread_name(fmt("zeek.%s", node->Name().data())); - return true; + Supervisor::SupervisedNode rval; + rval.config = node->config; + rval.parent_pid = ppid; + return rval; } node->pid = node_pid; node->spawn_time = std::chrono::steady_clock::now(); DBG_STEM("Stem spawned node: %s (%d)", node->Name().data(), node->pid); - return false; + return {}; } int Stem::AliveNodeCount() const @@ -660,7 +668,7 @@ void Stem::LogError(const char* format, ...) const #endif } -std::optional Stem::Run() +std::optional Stem::Run() { for ( ; ; ) { @@ -673,10 +681,12 @@ std::optional Stem::Run() return {}; } -std::optional Stem::Poll() +std::optional Stem::Poll() { pollfd fds[2] = { { pipe->InFD(), POLLIN, 0 }, { signal_flare->FD(), POLLIN, 0} }; + // Note: the poll timeout here is for periodically checking if the parent + // process died (see below). constexpr auto poll_timeout_ms = 1000; auto res = poll(fds, 2, poll_timeout_ms); @@ -695,11 +705,15 @@ std::optional Stem::Poll() last_signal = -1; } - if ( getppid() == 1 ) + if ( getppid() != parent_pid ) { - // TODO: better way to detect loss of parent than polling ? - // e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux - // or procctl(PROC_PDEATHSIG_CTL) on FreeBSD + // Note: only simple + portable way of detecting loss of parent + // process seems to be polling for change in PPID. There's platform + // specific ways if we do end up needing something more responsive + // and/or have to avoid overhead of polling, but maybe not worth + // the additional complexity: + // Linux: prctl(PR_SET_PDEATHSIG, ...) + // FreeBSD: procctl(PROC_PDEATHSIG_CTL) DBG_STEM("Stem suicide"); Shutdown(13); } @@ -761,8 +775,10 @@ std::optional Stem::Poll() auto it = nodes.emplace(node_name, std::move(node_config)).first; auto& node = it->second; - if ( Spawn(&node) ) - return node.config; + auto sn = Spawn(&node); + + if ( sn ) + return sn; DBG_STEM("Stem created node: %s (%d)", node.Name().data(), node.pid); ReportStatus(node); @@ -784,8 +800,10 @@ std::optional Stem::Poll() DBG_STEM("Stem restarting node: %s", node_name.data()); Destroy(&node); - if ( Spawn(&node) ) - return node.config; + auto sn = Spawn(&node); + + if ( sn ) + return sn; ReportStatus(node); } @@ -796,9 +814,9 @@ std::optional Stem::Poll() return {}; } -std::optional Supervisor::RunStem(std::unique_ptr pipe) +std::optional Supervisor::RunStem(std::unique_ptr pipe, pid_t parent_pid) { - Stem s(std::move(pipe)); + Stem s(std::move(pipe), parent_pid); return s.Run(); } @@ -1028,8 +1046,11 @@ static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRol } } -void Supervisor::NodeConfig::InitCluster() +bool Supervisor::SupervisedNode::InitCluster() { + if ( supervised_node->config.cluster.empty() ) + return false; + auto cluster_node_type = global_scope()->Lookup("Cluster::Node")->AsType()->AsRecordType(); auto cluster_nodes_id = global_scope()->Lookup("Cluster::nodes"); auto cluster_manager_is_logger_id = global_scope()->Lookup("Cluster::manager_is_logger"); @@ -1037,7 +1058,7 @@ void Supervisor::NodeConfig::InitCluster() auto has_logger = false; std::optional manager_name; - for ( const auto& e : supervised_node->cluster ) + for ( const auto& e : supervised_node->config.cluster ) { if ( e.second.role == BifEnum::Supervisor::MANAGER ) manager_name = e.first; @@ -1045,7 +1066,7 @@ void Supervisor::NodeConfig::InitCluster() has_logger = true; } - for ( const auto& e : supervised_node->cluster ) + for ( const auto& e : supervised_node->config.cluster ) { const auto& node_name = e.first; const auto& ep = e.second; @@ -1069,6 +1090,7 @@ void Supervisor::NodeConfig::InitCluster() } cluster_manager_is_logger_id->SetVal(val_mgr->GetBool(! has_logger)); + return true; } RecordVal* Supervisor::Status(std::string_view node_name) diff --git a/src/Supervisor.h b/src/Supervisor.h index 01429e6508..a8a5429f87 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -34,7 +34,6 @@ public: }; struct NodeConfig { - static void InitCluster(); static NodeConfig FromRecord(const RecordVal* node_val); static NodeConfig FromJSON(std::string_view json); @@ -51,6 +50,13 @@ public: std::map cluster; }; + struct SupervisedNode { + static bool InitCluster(); + + NodeConfig config; + pid_t parent_pid; + }; + struct Node { IntrusivePtr ToRecord() const; @@ -69,7 +75,8 @@ public: std::chrono::time_point spawn_time; }; - static std::optional RunStem(std::unique_ptr pipe); + static std::optional RunStem(std::unique_ptr pipe, + pid_t parent_pid); using NodeMap = std::map>; @@ -120,6 +127,6 @@ private: }; extern Supervisor* supervisor; -extern std::optional supervised_node; +extern std::optional supervised_node; } // namespace zeek diff --git a/src/main.cc b/src/main.cc index a5c23c5a87..6cd22b5ad2 100644 --- a/src/main.cc +++ b/src/main.cc @@ -100,7 +100,7 @@ zeekygen::Manager* zeekygen_mgr = 0; iosource::Manager* iosource_mgr = 0; bro_broker::Manager* broker_mgr = 0; zeek::Supervisor* zeek::supervisor = 0; -std::optional zeek::supervised_node; +std::optional zeek::supervised_node; std::vector zeek_script_prefixes; Stmt* stmts; @@ -328,55 +328,53 @@ struct zeek_options { static void init_supervised_node(zeek_options* options) { - const auto& node_name = zeek::supervised_node->name; + const auto& config = zeek::supervised_node->config; + const auto& node_name = config.name; - if ( zeek::supervised_node->directory ) + if ( config.directory ) { - if ( chdir(zeek::supervised_node->directory->data()) ) + if ( chdir(config.directory->data()) ) { fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", - node_name.data(), - zeek::supervised_node->directory->data(), + node_name.data(), config.directory->data(), strerror(errno)); exit(1); } } - if ( zeek::supervised_node->stderr_file ) + if ( config.stderr_file ) { - auto fd = open(zeek::supervised_node->stderr_file->data(), + auto fd = open(config.stderr_file->data(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, 0600); if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) { fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", - node_name.data(), - zeek::supervised_node->stderr_file->data(), + node_name.data(), config.stderr_file->data(), strerror(errno)); exit(1); } } - if ( zeek::supervised_node->stdout_file ) + if ( config.stdout_file ) { - auto fd = open(zeek::supervised_node->stdout_file->data(), + auto fd = open(config.stdout_file->data(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, 0600); if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) { fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", - node_name.data(), - zeek::supervised_node->stdout_file->data(), + node_name.data(), config.stdout_file->data(), strerror(errno)); exit(1); } } - if ( zeek::supervised_node->cpu_affinity ) + if ( config.cpu_affinity ) { - auto res = zeek::set_affinity(*zeek::supervised_node->cpu_affinity); + auto res = zeek::set_affinity(*config.cpu_affinity); if ( ! res ) fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", @@ -385,10 +383,10 @@ static void init_supervised_node(zeek_options* options) options->filter_supervised_node_options(); - if ( zeek::supervised_node->interface ) - options->interfaces.emplace_back(*zeek::supervised_node->interface); + if ( config.interface ) + options->interfaces.emplace_back(*config.interface); - if ( ! zeek::supervised_node->cluster.empty() ) + if ( ! config.cluster.empty() ) { if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) { @@ -398,7 +396,7 @@ static void init_supervised_node(zeek_options* options) } } - for ( const auto& s : zeek::supervised_node->scripts ) + for ( const auto& s : config.scripts ) options->scripts_to_load.emplace_back(s); } @@ -974,27 +972,30 @@ int main(int argc, char** argv) if ( zeek_stem_env ) { - std::vector fd_strings; - tokenize_string(zeek_stem_env, ",", &fd_strings); + std::vector zeek_stem_nums; + tokenize_string(zeek_stem_env, ",", &zeek_stem_nums); - if ( fd_strings.size() != 4 ) + if ( zeek_stem_nums.size() != 5 ) { fprintf(stderr, "invalid ZEEK_STEM environment variable value: '%s'\n", zeek_stem_env); exit(1); } + pid_t stem_ppid = std::stoi(zeek_stem_nums[0]); int fds[4]; for ( auto i = 0; i < 4; ++i ) - fds[i] = std::stoi(fd_strings[i]); + fds[i] = std::stoi(zeek_stem_nums[i + 1]); supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); - zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe), + stem_ppid); } else if ( options.supervisor_mode ) { supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); + auto stem_ppid = getpid(); stem_pid = fork(); if ( stem_pid == -1 ) @@ -1005,7 +1006,8 @@ int main(int argc, char** argv) } if ( stem_pid == 0 ) - zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe)); + zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe), + stem_ppid); } if ( zeek::supervised_node ) diff --git a/src/supervisor.bif b/src/supervisor.bif index 6990bf8385..dc1c14e871 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -68,11 +68,8 @@ function Supervisor::__restart%(node: string%): bool function Supervisor::__init_cluster%(%): bool %{ - if ( zeek::supervised_node && ! zeek::supervised_node->cluster.empty() ) - { - zeek::supervised_node->InitCluster(); - return val_mgr->GetBool(true); - } + if ( zeek::supervised_node ) + return val_mgr->GetBool(zeek::supervised_node->InitCluster()); return val_mgr->GetBool(false); %} From dbca14e1fcef7e79b3e5d383c693d0fc337115cf Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 15 Jan 2020 15:27:53 -0800 Subject: [PATCH 31/76] Use a timer to check for death of supervised node's parent --- src/Net.cc | 11 ----------- src/Supervisor.cc | 25 +++++++++++++++++++++++++ src/Supervisor.h | 13 +++++++++++++ src/Timer.cc | 1 + src/Timer.h | 1 + src/main.cc | 3 +++ 6 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/Net.cc b/src/Net.cc index 481804ce4c..8c7e976bbc 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -290,17 +290,6 @@ void net_run() while ( iosource_mgr->Size() || (BifConst::exit_only_after_terminate && ! terminating) ) { - // Note: only simple + portable way of detecting loss of parent - // process seems to be polling for change in PPID. There's platform - // specific ways if we do end up needing something more responsive - // and/or have to avoid overhead of polling, but maybe not worth - // the additional complexity: - // Linux: prctl(PR_SET_PDEATHSIG, ...) - // FreeBSD: procctl(PROC_PDEATHSIG_CTL) - // TODO: make this a proper timer - if ( zeek::supervised_node && zeek::supervised_node->parent_pid != getppid() ) - zeek_terminate_loop("supervised cluster node was orphaned"); - double ts; iosource::IOSource* src = iosource_mgr->FindSoonest(&ts); diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 4899a65f8c..21b88279e5 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -13,6 +13,7 @@ #include "Reporter.h" #include "DebugLogger.h" #include "Val.h" +#include "Net.h" #include "NetVar.h" #include "zeek-config.h" #include "util.h" @@ -124,6 +125,29 @@ static std::string make_create_message(const Supervisor::NodeConfig& node) return fmt("create %s %s", node.name.data(), json_str.data()); } +ParentProcessCheckTimer::ParentProcessCheckTimer(double t, double arg_interval) + : Timer(t, TIMER_PPID_CHECK), interval(arg_interval) + { + } + +void ParentProcessCheckTimer::Dispatch(double t, int is_expire) + { + // Note: only simple + portable way of detecting loss of parent + // process seems to be polling for change in PPID. There's platform + // specific ways if we do end up needing something more responsive + // and/or have to avoid overhead of polling, but maybe not worth + // the additional complexity: + // Linux: prctl(PR_SET_PDEATHSIG, ...) + // FreeBSD: procctl(PROC_PDEATHSIG_CTL) + // Also note the Stem process has its own polling loop with similar logic. + if ( zeek::supervised_node->parent_pid != getppid() ) + zeek_terminate_loop("supervised node was orphaned"); + + if ( ! is_expire ) + timer_mgr->Add(new ParentProcessCheckTimer(network_time + interval, + interval)); + } + Supervisor::Supervisor(Supervisor::Config cfg, std::unique_ptr pipe, pid_t arg_stem_pid) @@ -714,6 +738,7 @@ std::optional Stem::Poll() // the additional complexity: // Linux: prctl(PR_SET_PDEATHSIG, ...) // FreeBSD: procctl(PROC_PDEATHSIG_CTL) + // Also note the similar polling methodology in ParentProcessCheckTimer. DBG_STEM("Stem suicide"); Shutdown(13); } diff --git a/src/Supervisor.h b/src/Supervisor.h index a8a5429f87..100496d056 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -12,6 +12,7 @@ #include #include "iosource/IOSource.h" +#include "Timer.h" #include "Pipe.h" #include "Flare.h" #include "NetVar.h" @@ -19,6 +20,18 @@ namespace zeek { +class ParentProcessCheckTimer : public Timer { +public: + + ParentProcessCheckTimer(double t, double arg_interval); + + void Dispatch(double t, int is_expire) override; + +protected: + + double interval; +}; + class Supervisor : public iosource::IOSource { public: diff --git a/src/Timer.cc b/src/Timer.cc index 1138deec79..40ac0696f4 100644 --- a/src/Timer.cc +++ b/src/Timer.cc @@ -37,6 +37,7 @@ const char* TimerNames[] = { "TCPConnectionPartialClose", "TCPConnectionResetTimer", "TriggerTimer", + "ParentProcessIDCheck", "TimerMgrExpireTimer", }; diff --git a/src/Timer.h b/src/Timer.h index 5eb0aadec8..ba3a98f61e 100644 --- a/src/Timer.h +++ b/src/Timer.h @@ -41,6 +41,7 @@ enum TimerType { TIMER_TCP_PARTIAL_CLOSE, TIMER_TCP_RESET, TIMER_TRIGGER, + TIMER_PPID_CHECK, TIMER_TIMERMGR_EXPIRE, }; const int NUM_TIMER_TYPES = int(TIMER_TIMERMGR_EXPIRE) + 1; diff --git a/src/main.cc b/src/main.cc index 6cd22b5ad2..7d7f763f62 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1481,6 +1481,9 @@ int main(int argc, char** argv) #endif + if ( zeek::supervised_node ) + timer_mgr->Add(new zeek::ParentProcessCheckTimer(1, 1)); + double time_net_start = current_time(true);; uint64_t mem_net_start_total; From 38cd56a3dba076c625045c2b433b5f956ed118c9 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 16 Jan 2020 13:07:12 -0800 Subject: [PATCH 32/76] Improve normalize_path() util function It didn't always properly handle ".." when the preceding path component was also the first component. --- src/util.cc | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/util.cc b/src/util.cc index 4e597de07f..fa03279f62 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1472,6 +1472,25 @@ TEST_CASE("util normalize_path") CHECK(normalize_path("/1/./2/3") == "/1/2/3"); CHECK(normalize_path("/1/2/../3") == "/1/3"); CHECK(normalize_path("1/2/3/") == "1/2/3"); + CHECK(normalize_path("1/2//3///") == "1/2/3"); + CHECK(normalize_path("~/zeek/testing") == "~/zeek/testing"); + CHECK(normalize_path("~jon/zeek/testing") == "~jon/zeek/testing"); + CHECK(normalize_path("~jon/./zeek/testing") == "~jon/zeek/testing"); + CHECK(normalize_path("~/zeek/testing/../././.") == "~/zeek"); + CHECK(normalize_path("./zeek") == "./zeek"); + CHECK(normalize_path("../zeek") == "../zeek"); + CHECK(normalize_path("../zeek/testing/..") == "../zeek"); + CHECK(normalize_path("./zeek/..") == "."); + CHECK(normalize_path("./zeek/../..") == ".."); + CHECK(normalize_path("./zeek/../../..") == "../.."); + CHECK(normalize_path("./..") == ".."); + CHECK(normalize_path("../..") == "../.."); + CHECK(normalize_path("/..") == "/.."); + CHECK(normalize_path("~/..") == "~/.."); + CHECK(normalize_path("/../..") == "/../.."); + CHECK(normalize_path("~/../..") == "~/../.."); + CHECK(normalize_path("zeek/..") == ""); + CHECK(normalize_path("zeek/../..") == ".."); } string normalize_path(const string& path) @@ -1493,10 +1512,30 @@ string normalize_path(const string& path) if ( *it == "." && it != components.begin() ) final_components.pop_back(); - else if ( *it == ".." && final_components[0] != ".." ) + else if ( *it == ".." ) { - final_components.pop_back(); - final_components.pop_back(); + auto cur_idx = final_components.size() - 1; + + if ( cur_idx != 0 ) + { + auto last_idx = cur_idx - 1; + auto& last_component = final_components[last_idx]; + + if ( last_component == "/" || last_component == "~" || + last_component == ".." ) + continue; + + if ( last_component == "." ) + { + last_component = ".."; + final_components.pop_back(); + } + else + { + final_components.pop_back(); + final_components.pop_back(); + } + } } } From 8a145ee1a2a015916de3a94ec0dd1039ebdc563f Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 16 Jan 2020 13:11:04 -0800 Subject: [PATCH 33/76] Fix supervised node inheritence of command-line script paths They're now converting to absolute paths in the argument parsing phase such that if a supervised node switches working directory, it can still load the referenced script. --- src/main.cc | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/src/main.cc b/src/main.cc index 7d7f763f62..63ba0c62d3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -681,6 +681,57 @@ static zeek_options parse_cmdline(int argc, char** argv) rval.scripts_to_load.emplace_back(zargs[optind++]); } + auto canonify_script_path = [](std::string* path) + { + if ( path->empty() ) + return; + + *path = normalize_path(*path); + + if ( (*path)[0] == '/' || (*path)[0] == '~' ) + // Absolute path + return; + + if ( (*path)[0] != '.' ) + { + // Look up file in ZEEKPATH + auto res = find_script_file(*path, bro_path()); + + if ( res.empty() ) + { + fprintf(stderr, "failed to locate script: %s\n", path->data()); + exit(1); + } + + *path = res; + + if ( (*path)[0] == '/' || (*path)[0] == '~' ) + // Now an absolute path + return; + } + + // Need to translate relative path to absolute. + char cwd[PATH_MAX]; + + if ( ! getcwd(cwd, sizeof(cwd)) ) + { + fprintf(stderr, "failed to get current directory: %s\n", + strerror(errno)); + exit(1); + } + + *path = std::string(cwd) + "/" + *path; + }; + + if ( rval.supervisor_mode ) + { + // Translate any relative paths supplied to supervisor into absolute + // paths for use by supervised nodes since they have the option to + // operate out of a different working directory. + for ( auto& s : rval.scripts_to_load ) + canonify_script_path(&s); + } + return rval; } @@ -900,7 +951,7 @@ static std::string get_exe_path(const std::string& invocation) if ( invocation.empty() ) return ""; - if ( invocation[0] == '/' ) + if ( invocation[0] == '/' || invocation[0] == '~' ) // Absolute path return invocation; From 21c75b46eb815e2469d73b4017674d9b8a13be3d Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 16 Jan 2020 14:23:08 -0800 Subject: [PATCH 34/76] Improve logging of supervised node errors Now getting sent through standard Reporter framework in the Supervisor process. --- src/Supervisor.cc | 70 ++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 21b88279e5..f2f48f120f 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -62,6 +62,8 @@ struct Stem { void ReportStatus(const Supervisor::Node& node) const; + void Log(std::string_view type, const char* format, va_list args) const; + void LogDebug(const char* format, ...) const __attribute__((format(printf, 2, 3))); void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3))); @@ -395,6 +397,12 @@ size_t Supervisor::ProcessMessages() { // Already logged the unparsed message above. } + else if ( type == "error" ) + { + msg_tokens.erase(msg_tokens.begin()); + auto err_msg = implode_string_vector(msg_tokens, " "); + reporter->Error("%s", err_msg.data()); + } else reporter->Error("Supervisor got unknown msg: %s", msg.data()); } @@ -457,7 +465,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( res == -1 ) { - LogError("Stem failed to get node exit status %s (%d): %s", + LogError("Stem failed to get node exit status '%s' (PID %d): %s", node->Name().data(), node->pid, strerror(errno)); return false; } @@ -465,17 +473,25 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( WIFEXITED(status) ) { node->exit_status = WEXITSTATUS(status); - DBG_STEM("node '%s' exited with status %d", - node->Name().data(), node->exit_status); + DBG_STEM("node '%s' (PID %d) exited with status %d", + node->Name().data(), node->pid, node->exit_status); + + if ( ! shutting_down ) + LogError("Supervised node '%s' (PID %d) exited prematurely with status %d", + node->Name().data(), node->pid, node->exit_status); } else if ( WIFSIGNALED(status) ) { node->signal_number = WTERMSIG(status); - DBG_STEM("node '%s' terminated by signal %d", - node->Name().data(), node->signal_number); + DBG_STEM("node '%s' (PID %d) terminated by signal %d", + node->Name().data(), node->pid, node->signal_number); + + if ( ! shutting_down ) + LogError("Supervised node '%s' (PID %d) terminated prematurely by signal %d", + node->Name().data(), node->pid, node->signal_number); } else - LogError("Stem failed to get node exit status %s (%d)", + LogError("Stem failed to get node exit status '%s' (PID %d)", node->Name().data(), node->pid); node->pid = 0; @@ -487,8 +503,8 @@ void Stem::KillNode(const Supervisor::Node& node, int signal) const auto kill_res = kill(node.pid, signal); if ( kill_res == -1 ) - LogError("Failed to send signal to node %s: %s", - node.Name().data(), strerror(errno)); + LogError("Failed to send signal to node '%s' (PID %d): %s", + node.Name().data(), node.pid, strerror(errno)); } void Stem::Destroy(Supervisor::Node* node) const @@ -506,7 +522,7 @@ void Stem::Destroy(Supervisor::Node* node) const if ( Wait(node, WNOHANG) ) break; - DBG_STEM("Stem waiting to destroy node: %s (%d)", + DBG_STEM("Stem waiting to destroy node: %s (PID %d)", node->Name().data(), node->pid); sleep(kill_delay); } @@ -551,6 +567,8 @@ std::optional Stem::Revive() if ( sn ) return sn; + LogError("Supervised node '%s' (PID %d) revived after premature exit", + node.Name().data(), node.pid); ReportStatus(node); } @@ -582,7 +600,7 @@ std::optional Stem::Spawn(Supervisor::Node* node) node->pid = node_pid; node->spawn_time = std::chrono::steady_clock::now(); - DBG_STEM("Stem spawned node: %s (%d)", node->Name().data(), node->pid); + DBG_STEM("Stem spawned node: %s (PID %d)", node->Name().data(), node->pid); return {}; } @@ -605,6 +623,7 @@ void Stem::KillNodes(int signal) const void Stem::Shutdown(int exit_code) { + shutting_down = true; constexpr auto max_term_attempts = 13; constexpr auto kill_delay = 2; auto kill_attempts = 0; @@ -654,12 +673,9 @@ void Stem::ReportStatus(const Supervisor::Node& node) const safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } -void Stem::LogDebug(const char* format, ...) const +void Stem::Log(std::string_view type, const char* format, va_list args) const { - va_list args; - va_start(args, format); auto raw_msg = fmt(format, args); - va_end(args); if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) { @@ -668,28 +684,26 @@ void Stem::LogDebug(const char* format, ...) const return; } - std::string msg = "debug "; + std::string msg{type.data(), type.size()}; + msg += " "; msg += raw_msg; safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } +void Stem::LogDebug(const char* format, ...) const + { + va_list args; + va_start(args, format); + Log("debug", format, args); + va_end(args); + } + void Stem::LogError(const char* format, ...) const { va_list args; va_start(args, format); - std::string msg = fmt(format, args); + Log("error", format, args); va_end(args); - - fprintf(stderr, "%s\n", msg.data()); - - #ifdef DEBUG - if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) - // Essentially already emitted above. - return; - - // Useful to also insert the error message into the debug log. - LogDebug("%s", msg.data()); - #endif } std::optional Stem::Run() @@ -805,7 +819,7 @@ std::optional Stem::Poll() if ( sn ) return sn; - DBG_STEM("Stem created node: %s (%d)", node.Name().data(), node.pid); + DBG_STEM("Stem created node: %s (PID %d)", node.Name().data(), node.pid); ReportStatus(node); } else if ( cmd == "destroy" ) From 1972190b89d7f72a5e18bbe37037966d961025d9 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 16 Jan 2020 19:21:53 -0800 Subject: [PATCH 35/76] Add supervisor btests --- scripts/base/frameworks/supervisor/api.zeek | 2 +- src/Supervisor.cc | 25 +++--- src/Supervisor.h | 1 + .../zeek.logger-1.stdout | 2 + .../zeek.manager.stdout | 2 + .../zeek.proxy-1.stdout | 2 + .../zeek.supervisor.out | 3 + .../zeek.worker-1.stdout | 2 + .../zeek.qux.node.out | 2 + .../zeek.supervisor.out | 3 + .../zeek.qux.grault.stderr | 3 + .../zeek.qux.grault.stdout | 2 + .../zeek.supervisor.out | 3 + .../supervisor.config-scripts/zeek.node.out | 3 + .../zeek.supervisor.out | 3 + .../Baseline/supervisor.create/zeek.node.out | 2 + .../supervisor.create/zeek.supervisor.out | 2 + .../Baseline/supervisor.destroy/zeek.node.out | 2 + .../supervisor.destroy/zeek.supervisor.out | 3 + .../Baseline/supervisor.restart/zeek..stdout | 3 + .../Baseline/supervisor.status/zeek..stdout | 1 + testing/btest/btest.cfg | 2 +- testing/btest/supervisor/config-cluster.zeek | 89 +++++++++++++++++++ .../btest/supervisor/config-directory.zeek | 60 +++++++++++++ .../supervisor/config-output-redirect.zeek | 66 ++++++++++++++ testing/btest/supervisor/config-scripts.zeek | 70 +++++++++++++++ testing/btest/supervisor/create.zeek | 42 +++++++++ testing/btest/supervisor/destroy.zeek | 60 +++++++++++++ testing/btest/supervisor/restart.zeek | 66 ++++++++++++++ testing/btest/supervisor/status.zeek | 34 +++++++ 30 files changed, 546 insertions(+), 14 deletions(-) create mode 100644 testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout create mode 100644 testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout create mode 100644 testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout create mode 100644 testing/btest/Baseline/supervisor.config-cluster/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout create mode 100644 testing/btest/Baseline/supervisor.config-directory/zeek.qux.node.out create mode 100644 testing/btest/Baseline/supervisor.config-directory/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stderr create mode 100644 testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stdout create mode 100644 testing/btest/Baseline/supervisor.config-output-redirect/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.config-scripts/zeek.node.out create mode 100644 testing/btest/Baseline/supervisor.config-scripts/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.create/zeek.node.out create mode 100644 testing/btest/Baseline/supervisor.create/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.destroy/zeek.node.out create mode 100644 testing/btest/Baseline/supervisor.destroy/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.restart/zeek..stdout create mode 100644 testing/btest/Baseline/supervisor.status/zeek..stdout create mode 100644 testing/btest/supervisor/config-cluster.zeek create mode 100644 testing/btest/supervisor/config-directory.zeek create mode 100644 testing/btest/supervisor/config-output-redirect.zeek create mode 100644 testing/btest/supervisor/config-scripts.zeek create mode 100644 testing/btest/supervisor/create.zeek create mode 100644 testing/btest/supervisor/destroy.zeek create mode 100644 testing/btest/supervisor/restart.zeek create mode 100644 testing/btest/supervisor/status.zeek diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index a7593b8f3e..22e73cce81 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -32,7 +32,7 @@ export { type NodeStatus: record { node: NodeConfig; - pid: count; + pid: int &optional; }; type Status: record { diff --git a/src/Supervisor.cc b/src/Supervisor.cc index f2f48f120f..7ae6a81e4e 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -50,9 +50,9 @@ struct Stem { int AliveNodeCount() const; - void KillNodes(int signal) const; + void KillNodes(int signal); - void KillNode(const Supervisor::Node& node, int signal) const; + void KillNode(Supervisor::Node* node, int signal) const; void Destroy(Supervisor::Node* node) const; @@ -476,7 +476,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const DBG_STEM("node '%s' (PID %d) exited with status %d", node->Name().data(), node->pid, node->exit_status); - if ( ! shutting_down ) + if ( ! node->killed ) LogError("Supervised node '%s' (PID %d) exited prematurely with status %d", node->Name().data(), node->pid, node->exit_status); } @@ -486,7 +486,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const DBG_STEM("node '%s' (PID %d) terminated by signal %d", node->Name().data(), node->pid, node->signal_number); - if ( ! shutting_down ) + if ( ! node->killed ) LogError("Supervised node '%s' (PID %d) terminated prematurely by signal %d", node->Name().data(), node->pid, node->signal_number); } @@ -498,13 +498,14 @@ bool Stem::Wait(Supervisor::Node* node, int options) const return true; } -void Stem::KillNode(const Supervisor::Node& node, int signal) const +void Stem::KillNode(Supervisor::Node* node, int signal) const { - auto kill_res = kill(node.pid, signal); + node->killed = true; + auto kill_res = kill(node->pid, signal); if ( kill_res == -1 ) LogError("Failed to send signal to node '%s' (PID %d): %s", - node.Name().data(), node.pid, strerror(errno)); + node->Name().data(), node->pid, strerror(errno)); } void Stem::Destroy(Supervisor::Node* node) const @@ -516,7 +517,7 @@ void Stem::Destroy(Supervisor::Node* node) const for ( ; ; ) { auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; - KillNode(*node, sig); + KillNode(node, sig); usleep(10); if ( Wait(node, WNOHANG) ) @@ -615,10 +616,10 @@ int Stem::AliveNodeCount() const return rval; } -void Stem::KillNodes(int signal) const +void Stem::KillNodes(int signal) { - for ( const auto& n : nodes ) - KillNode(n.second, signal); + for ( auto& n : nodes ) + KillNode(&n.second, signal); } void Stem::Shutdown(int exit_code) @@ -1061,7 +1062,7 @@ IntrusivePtr Supervisor::Node::ToRecord() const rval->Assign(rt->FieldOffset("node"), config.ToRecord().detach()); if ( pid ) - rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(pid)); + rval->Assign(rt->FieldOffset("pid"), val_mgr->GetInt(pid)); return rval; } diff --git a/src/Supervisor.h b/src/Supervisor.h index 100496d056..bb27ab6a4c 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -81,6 +81,7 @@ public: NodeConfig config; pid_t pid = 0; + bool killed = false; int exit_status = 0; int signal_number = 0; int revival_attempts = 0; diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout new file mode 100644 index 0000000000..722fe53d14 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout @@ -0,0 +1,2 @@ +supervised node zeek_init(), logger-1, Cluster::LOGGER +supervised node zeek_done(), logger-1 diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout new file mode 100644 index 0000000000..e059737e0d --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout @@ -0,0 +1,2 @@ +supervised node zeek_init(), manager, Cluster::MANAGER +supervised node zeek_done(), manager diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout new file mode 100644 index 0000000000..5c5b4e3ca5 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout @@ -0,0 +1,2 @@ +supervised node zeek_init(), proxy-1, Cluster::PROXY +supervised node zeek_done(), proxy-1 diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.supervisor.out b/testing/btest/Baseline/supervisor.config-cluster/zeek.supervisor.out new file mode 100644 index 0000000000..545693ae5f --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.supervisor.out @@ -0,0 +1,3 @@ +supervisor zeek_init() +shutting down +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout new file mode 100644 index 0000000000..0a1c84a25c --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout @@ -0,0 +1,2 @@ +supervised node zeek_init(), worker-1, Cluster::WORKER +supervised node zeek_done(), worker-1 diff --git a/testing/btest/Baseline/supervisor.config-directory/zeek.qux.node.out b/testing/btest/Baseline/supervisor.config-directory/zeek.qux.node.out new file mode 100644 index 0000000000..101b306cf0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-directory/zeek.qux.node.out @@ -0,0 +1,2 @@ +supervised node zeek_init() +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-directory/zeek.supervisor.out b/testing/btest/Baseline/supervisor.config-directory/zeek.supervisor.out new file mode 100644 index 0000000000..295c7211d6 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-directory/zeek.supervisor.out @@ -0,0 +1,3 @@ +supervisor zeek_init() +destroying node +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stderr b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stderr new file mode 100644 index 0000000000..30a9fb1fde --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stderr @@ -0,0 +1,3 @@ +(stderr) supervised node zeek_init() +received termination signal +(stderr) supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stdout b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stdout new file mode 100644 index 0000000000..7b21bfad60 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.qux.grault.stdout @@ -0,0 +1,2 @@ +(stdout) supervised node zeek_init() +(stdout) supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-output-redirect/zeek.supervisor.out b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.supervisor.out new file mode 100644 index 0000000000..295c7211d6 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-output-redirect/zeek.supervisor.out @@ -0,0 +1,3 @@ +supervisor zeek_init() +destroying node +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-scripts/zeek.node.out b/testing/btest/Baseline/supervisor.config-scripts/zeek.node.out new file mode 100644 index 0000000000..64685fcf36 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-scripts/zeek.node.out @@ -0,0 +1,3 @@ +supervised node zeek_init() +supervised node loaded qux.zeek +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.config-scripts/zeek.supervisor.out b/testing/btest/Baseline/supervisor.config-scripts/zeek.supervisor.out new file mode 100644 index 0000000000..295c7211d6 --- /dev/null +++ b/testing/btest/Baseline/supervisor.config-scripts/zeek.supervisor.out @@ -0,0 +1,3 @@ +supervisor zeek_init() +destroying node +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.create/zeek.node.out b/testing/btest/Baseline/supervisor.create/zeek.node.out new file mode 100644 index 0000000000..101b306cf0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.create/zeek.node.out @@ -0,0 +1,2 @@ +supervised node zeek_init() +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.create/zeek.supervisor.out b/testing/btest/Baseline/supervisor.create/zeek.supervisor.out new file mode 100644 index 0000000000..01bdfe9332 --- /dev/null +++ b/testing/btest/Baseline/supervisor.create/zeek.supervisor.out @@ -0,0 +1,2 @@ +supervisor zeek_init() +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.destroy/zeek.node.out b/testing/btest/Baseline/supervisor.destroy/zeek.node.out new file mode 100644 index 0000000000..101b306cf0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.destroy/zeek.node.out @@ -0,0 +1,2 @@ +supervised node zeek_init() +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.destroy/zeek.supervisor.out b/testing/btest/Baseline/supervisor.destroy/zeek.supervisor.out new file mode 100644 index 0000000000..295c7211d6 --- /dev/null +++ b/testing/btest/Baseline/supervisor.destroy/zeek.supervisor.out @@ -0,0 +1,3 @@ +supervisor zeek_init() +destroying node +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.restart/zeek..stdout b/testing/btest/Baseline/supervisor.restart/zeek..stdout new file mode 100644 index 0000000000..15f6aa3c98 --- /dev/null +++ b/testing/btest/Baseline/supervisor.restart/zeek..stdout @@ -0,0 +1,3 @@ +got supervised node status, grault +got supervised node status, grault +got supervised node status, grault diff --git a/testing/btest/Baseline/supervisor.status/zeek..stdout b/testing/btest/Baseline/supervisor.status/zeek..stdout new file mode 100644 index 0000000000..59a36a37e0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.status/zeek..stdout @@ -0,0 +1 @@ +got supervised node status, grault diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg index ef56fd2afa..258a7b04ae 100644 --- a/testing/btest/btest.cfg +++ b/testing/btest/btest.cfg @@ -1,5 +1,5 @@ [btest] -TestDirs = doc bifs language core scripts coverage signatures plugins broker +TestDirs = doc bifs language core scripts coverage signatures plugins broker supervisor TmpDir = %(testbase)s/.tmp BaselineDir = %(testbase)s/Baseline IgnoreDirs = .svn CVS .tmp diff --git a/testing/btest/supervisor/config-cluster.zeek b/testing/btest/supervisor/config-cluster.zeek new file mode 100644 index 0000000000..cea02e2020 --- /dev/null +++ b/testing/btest/supervisor/config-cluster.zeek @@ -0,0 +1,89 @@ +# @TEST-PORT: SUPERVISOR_PORT +# @TEST-PORT: MANAGER_PORT +# @TEST-PORT: LOGGER_PORT +# @TEST-PORT: PROXY_PORT +# @TEST-PORT: WORKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/manager/stdout +# @TEST-EXEC: btest-diff zeek/logger-1/stdout +# @TEST-EXEC: btest-diff zeek/worker-1/stdout +# @TEST-EXEC: btest-diff zeek/proxy-1/stdout + +@load base/frameworks/cluster + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global topic = "test-topic"; +global peer_count = 0; + +event shutdown() + { + print supervisor_output_file, "shutting down"; + terminate(); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("SUPERVISOR_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + + local cluster: table[string] of Supervisor::ClusterEndpoint; + cluster["manager"] = [$role=Supervisor::MANAGER, $host=127.0.0.1, + $p=to_port(getenv("MANAGER_PORT"))]; + cluster["logger-1"] = [$role=Supervisor::LOGGER, $host=127.0.0.1, + $p=to_port(getenv("LOGGER_PORT"))]; + cluster["proxy-1"] = [$role=Supervisor::PROXY, $host=127.0.0.1, + $p=to_port(getenv("PROXY_PORT"))]; + cluster["worker-1"] = [$role=Supervisor::WORKER, $host=127.0.0.1, + $p=to_port(getenv("WORKER_PORT"))]; + + for ( n, ep in cluster ) + { + local sn = Supervisor::NodeConfig($name = n); + sn$cluster = cluster; + sn$directory = n; + sn$stdout_file = "stdout"; + sn$stderr_file = "stderr"; + local res = Supervisor::create(sn); + + if ( res != "" ) + print fmt("failed to create node %s: %s", n, res); + } + } + else + { + Broker::peer("127.0.0.1", to_port(getenv("SUPERVISOR_PORT"))); + print "supervised node zeek_init()", Cluster::node, Cluster::local_node_type(); + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + ++peer_count; + + if ( Supervisor::is_supervised() ) + { + if ( Cluster::node == "manager" && peer_count == 4 ) + Broker::publish(topic, shutdown); + } + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + print "supervised node zeek_done()", Cluster::node; + else + print supervisor_output_file, "supervisor zeek_done()"; + } diff --git a/testing/btest/supervisor/config-directory.zeek b/testing/btest/supervisor/config-directory.zeek new file mode 100644 index 0000000000..bcf9ebd69f --- /dev/null +++ b/testing/btest/supervisor/config-directory.zeek @@ -0,0 +1,60 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/qux/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; +global topic = "test-topic"; + +event do_destroy() + { + print supervisor_output_file, "destroying node"; + Supervisor::destroy("grault"); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault", $directory="qux"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervised() ) + Broker::publish(topic, do_destroy); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + # Should only be run by supervisor + terminate(); + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + print node_output_file, "supervised node zeek_done()"; + else + print supervisor_output_file, "supervisor zeek_done()"; + } diff --git a/testing/btest/supervisor/config-output-redirect.zeek b/testing/btest/supervisor/config-output-redirect.zeek new file mode 100644 index 0000000000..78a8d3d6ad --- /dev/null +++ b/testing/btest/supervisor/config-output-redirect.zeek @@ -0,0 +1,66 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/qux/grault.stdout +# @TEST-EXEC: btest-diff zeek/qux/grault.stderr + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global topic = "test-topic"; +global stderr = open("/dev/stderr"); + +event do_destroy() + { + print supervisor_output_file, "destroying node"; + Supervisor::destroy("grault"); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault", $directory="qux", + $stdout_file="grault.stdout", + $stderr_file="grault.stderr"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + print "(stdout) supervised node zeek_init()"; + print stderr, "(stderr) supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervised() ) + Broker::publish(topic, do_destroy); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + # Should only be run by supervisor + terminate(); + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + { + print "(stdout) supervised node zeek_done()"; + print stderr, "(stderr) supervised node zeek_done()"; + } + else + print supervisor_output_file, "supervisor zeek_done()"; + } diff --git a/testing/btest/supervisor/config-scripts.zeek b/testing/btest/supervisor/config-scripts.zeek new file mode 100644 index 0000000000..beeaa567f5 --- /dev/null +++ b/testing/btest/supervisor/config-scripts.zeek @@ -0,0 +1,70 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; +global topic = "test-topic"; + +event do_destroy() + { + print supervisor_output_file, "destroying node"; + Supervisor::destroy("grault"); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault", + $scripts=vector("../qux.zeek")); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervised() ) + Broker::publish(topic, do_destroy); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + # Should only be run by supervisor + terminate(); + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + print node_output_file, "supervised node zeek_done()"; + else + print supervisor_output_file, "supervisor zeek_done()"; + } + +@TEST-START-FILE qux.zeek + +event zeek_init() &priority=-10 + { + print node_output_file, "supervised node loaded qux.zeek"; + } + +@TEST-END-FILE diff --git a/testing/btest/supervisor/create.zeek b/testing/btest/supervisor/create.zeek new file mode 100644 index 0000000000..3455d63f12 --- /dev/null +++ b/testing/btest/supervisor/create.zeek @@ -0,0 +1,42 @@ +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; + +event zeek_init() + { + local pid_file = "supervisor.pid"; + + if ( Supervisor::is_supervisor() ) + { + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local f = open(pid_file); + print f, getpid(); + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + system(fmt("kill `cat %s`", pid_file)); + } + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + print node_output_file, "supervised node zeek_done()"; + else + print supervisor_output_file, "supervisor zeek_done()"; + } diff --git a/testing/btest/supervisor/destroy.zeek b/testing/btest/supervisor/destroy.zeek new file mode 100644 index 0000000000..233870b5dc --- /dev/null +++ b/testing/btest/supervisor/destroy.zeek @@ -0,0 +1,60 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; +global topic = "test-topic"; + +event do_destroy() + { + print supervisor_output_file, "destroying node"; + Supervisor::destroy("grault"); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervised() ) + Broker::publish(topic, do_destroy); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + # Should only be run by supervisor + terminate(); + } + +event zeek_done() + { + if ( Supervisor::is_supervised() ) + print node_output_file, "supervised node zeek_done()"; + else + print supervisor_output_file, "supervisor zeek_done()"; + } diff --git a/testing/btest/supervisor/restart.zeek b/testing/btest/supervisor/restart.zeek new file mode 100644 index 0000000000..e9ddeed97d --- /dev/null +++ b/testing/btest/supervisor/restart.zeek @@ -0,0 +1,66 @@ +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/.stdout + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global node_pid: int = 0; +global status_count = 0; +global check_interval = 0.1sec; + +event check_status(name: string &default="") + { + local s = Supervisor::status(name); + local ns = s$nodes["grault"]; + + if ( ! ns?$pid ) + { + schedule check_interval { check_status() }; + return; + } + + if ( status_count > 0 && node_pid == ns$pid ) + { + schedule check_interval { check_status() }; + return; + } + + print "got supervised node status", ns$node$name; + + node_pid = ns$pid; + ++status_count; + + if ( status_count == 1 ) + { + Supervisor::restart(); + schedule check_interval { check_status() }; + } + else if ( status_count == 2 ) + { + Supervisor::restart("grault"); + schedule check_interval { check_status("grault") }; + } + else + terminate(); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print "failed to create node", res; + + sn$name = "qux"; + res = Supervisor::create(sn); + + if ( res != "" ) + print "failed to create node", res; + + event check_status(); + } + } diff --git a/testing/btest/supervisor/status.zeek b/testing/btest/supervisor/status.zeek new file mode 100644 index 0000000000..e9ce4cdeca --- /dev/null +++ b/testing/btest/supervisor/status.zeek @@ -0,0 +1,34 @@ +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/.stdout + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +event check_status() + { + local s = Supervisor::status(); + local ns = s$nodes["grault"]; + + if ( ! ns?$pid ) + schedule 0.25sec { check_status() }; + else + { + print "got supervised node status", ns$node$name; + terminate(); + } + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print "failed to create node", res; + + event check_status(); + } + } From 8247c42368c5cd8f7239078ecc79226d37b34543 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 17 Jan 2020 18:36:32 -0800 Subject: [PATCH 36/76] Add Supervisor documentation Minor additions/changes to improve API I noticed along the way --- doc | 2 +- scripts/base/frameworks/supervisor/api.zeek | 128 ++++++++- scripts/base/frameworks/supervisor/main.zeek | 10 +- src/Supervisor.cc | 10 +- src/Supervisor.h | 264 ++++++++++++++++-- src/supervisor.bif | 15 + .../zeek.logger-1.stdout | 2 +- .../zeek.manager.stdout | 2 +- .../zeek.proxy-1.stdout | 2 +- .../zeek.worker-1.stdout | 2 +- testing/btest/supervisor/config-cluster.zeek | 2 +- 11 files changed, 405 insertions(+), 34 deletions(-) diff --git a/doc b/doc index 7192dbedf3..0cb30512c5 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 7192dbedf3ca9ce49294057262074f0e888177f3 +Subproject commit 0cb30512c52990fcdb1e93b5219f65c9b3d18dce diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 22e73cce81..767bd8803f 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -1,9 +1,9 @@ ##! The Zeek process supervision API. -# TODO: add proper docs module Supervisor; export { + ## The role a supervised-node will play in Zeek's Cluster Framework. type ClusterRole: enum { NONE, LOGGER, @@ -12,52 +12,178 @@ export { WORKER, }; + ## Describes configuration of a supervised-node within Zeek's Cluster + ## Framework. type ClusterEndpoint: record { + ## The role a supervised-node will play in Zeek's Cluster Framework. role: ClusterRole; + ## The host/IP at which the cluster node runs. host: addr; + ## The TCP port at which the cluster node listens for connections. p: port; + ## The interface name from which the node will read/analyze packets. + ## Typically used by worker nodes. interface: string &optional; }; + ## Configuration options that influence behavior of a supervised Zeek node. type NodeConfig: record { + ## The name of the supervised node. These are unique within a given + ## supervised process tree and typically human-readable. name: string; + ## The interface name from which the node will read/analyze packets. interface: string &optional; + ## The working directory that the node should use. directory: string &optional; + ## The filename/path to which the node's stdout will be redirected. stdout_file: string &optional; + ## The filename/path to which the node's stderr will be redirected. stderr_file: string &optional; + ## Additional script filenames/paths that the node should load. scripts: vector of string &default = vector(); + ## A cpu/core number to which the node will try to pin itself. cpu_affinity: int &optional; + ## The Cluster Layout definition. Each node in the Cluster Framework + ## knows about the full, static cluster topology to which it belongs. + ## Entries use node names for keys. The Supervisor framework will + ## automatically translate this table into the right Cluster Framework + ## configuration when spawning supervised-nodes. E.g. it will + ## populate the both the CLUSTER_NODE environment variable and + ## :zeek:see:`Cluster::nodes` table. cluster: table[string] of ClusterEndpoint &default=table(); }; + ## The current status of a supervised node. type NodeStatus: record { + ## The desired node configuration. node: NodeConfig; + ## The current or last known process ID of the node. This may not + ## be initialized if the process has not yet started. pid: int &optional; }; + ## The current status of a set of supervised nodes. type Status: record { + ## The status of supervised nodes, keyed by node names. nodes: table[string] of NodeStatus; }; + ## Create a new supervised node process. + ## It's an error to call this from a process other than a Supervisor. + ## + ## node: the desired configuration for the new supervised node process. + ## + ## Returns: an empty string on success or description of the error/failure. global create: function(node: NodeConfig): string; + + ## Retrieve current status of a supervised node process. + ## It's an error to call this from a process other than a Supervisor. + ## + ## node: the name of the node to get the status of or an empty string + ## to mean "all nodes". + ## + ## Returns: the current status of a set of nodes. global status: function(node: string &default=""): Status; + + ## Restart a supervised node process by destroying (killing) and + ## re-recreating it. + ## It's an error to call this from a process other than a Supervisor. + ## + ## node: the name of the node to restart or an empty string to mean + ## "all nodes". + ## + ## Returns: true on success. global restart: function(node: string &default=""): bool; + + ## Destroy and remove a supervised node process. + ## It's an error to call this from a process other than a Supervisor. + ## + ## node: the name of the node to destroy or an empty string to mean + ## "all nodes". + ## + ## Returns: true on success. global destroy: function(node: string &default=""): bool; + ## Returns: true if this is the Supervisor process. global is_supervisor: function(): bool; + + ## Returns: true if this is a supervised node process. global is_supervised: function(): bool; + ## Returns: the node configuration if this is a supervised node. + ## It's an error to call this function from a process other than + ## a supervised one. + global node: function(): NodeConfig; + + ## Send a request to a remote Supervisor process to create a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the desired configuration for the new supervised node process. global Supervisor::create_request: event(reqid: string, node: NodeConfig); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`Supervisor::create_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::create`. global Supervisor::create_response: event(reqid: string, result: string); + ## Send a request to a remote Supervisor process to retrieve node status. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to get status of or empty string to mean "all + ## nodes". global Supervisor::status_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`Supervisor::status_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::status`. global Supervisor::status_response: event(reqid: string, result: Status); + ## Send a request to a remote Supervisor process to restart a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to restart or empty string to mean "all + ## nodes". global Supervisor::restart_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`Supervisor::restart_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::restart`. global Supervisor::restart_response: event(reqid: string, result: bool); + ## Send a request to a remote Supervisor process to destroy a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to destory or empty string to mean "all + ## nodes". global Supervisor::destroy_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`Supervisor::destroy_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::destroy`. global Supervisor::destroy_response: event(reqid: string, result: bool); + ## Send a request to a remote Supervisor to stop and shutdown its + ## process tree. There is no response to this message as the Supervisor + ## simply terminates on receipt. global Supervisor::stop_request: event(); } diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 3e42331b71..5a7a163df0 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -1,6 +1,5 @@ ##! Implements Zeek process supervision configuration options and default ##! behavior. -# TODO: add proper docs @load ./api @load base/frameworks/broker @@ -8,6 +7,10 @@ module Supervisor; export { + ## The Broker topic prefix to use when subscribing to Supervisor API + ## requests and when publishing Supervisor API responses. If you are + ## publishing Supervisor requests, this is also the prefix string to use + ## for their topic names. const topic_prefix = "zeek/supervisor" &redef; } @@ -78,3 +81,8 @@ function is_supervised(): bool { return Supervisor::__is_supervised(); } + +function node(): NodeConfig + { + return Supervisor::__node(); + } diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 7ae6a81e4e..ca3034f478 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -38,7 +38,7 @@ struct Stem { ~Stem(); - std::optional Run(); + Supervisor::SupervisedNode Run(); std::optional Poll(); @@ -707,16 +707,18 @@ void Stem::LogError(const char* format, ...) const va_end(args); } -std::optional Stem::Run() +Supervisor::SupervisedNode Stem::Run() { for ( ; ; ) { auto new_node = Poll(); if ( new_node ) - return new_node; + return *new_node; } + // Shouldn't be reached. + assert(false); return {}; } @@ -854,7 +856,7 @@ std::optional Stem::Poll() return {}; } -std::optional Supervisor::RunStem(std::unique_ptr pipe, pid_t parent_pid) +Supervisor::SupervisedNode Supervisor::RunStem(std::unique_ptr pipe, pid_t parent_pid) { Stem s(std::move(pipe), parent_pid); return s.Run(); diff --git a/src/Supervisor.h b/src/Supervisor.h index bb27ab6a4c..f0ffe345dc 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -20,98 +20,296 @@ namespace zeek { -class ParentProcessCheckTimer : public Timer { -public: - - ParentProcessCheckTimer(double t, double arg_interval); - - void Dispatch(double t, int is_expire) override; - -protected: - - double interval; -}; - +/** + * A Supervisor object manages a tree of persistent Zeek processes. If any + * child process dies it will be re-created with its original configuration. + * The Supervisor process itself actually only manages a single child process, + * called the Stem process. That Stem is created via a fork() just after the + * command-line arguments have been parsed. The Stem process is used as the + * baseline image for spawning and supervising further Zeek child nodes since + * it has the purest global state without having to risk an exec() using an + * on-disk binary that's changed in the meantime from the original Supervisor's + * version of the Zeek binary. However, if the Stem process itself dies + * prematurely, the Supervisor will have to fork() and exec() to revive it (and + * then the revived Stem will re-spawn its own children). Any node in the tree + * will self-terminate if it detects its parent has died and that detection is + * done via polling for change in parent process ID. + */ class Supervisor : public iosource::IOSource { public: + /** + * Configuration options that change Supervisor behavior. + */ struct Config { + /** + * The filesystem path of the Zeek binary/executable. This is used + * if the Stem process ever dies and we need to fork() and exec() to + * re-create it. + */ std::string zeek_exe_path; }; + /** + * Configuration options that influence how a Supervised Zeek node + * integrates into the normal Zeek Cluster Framework. + */ struct ClusterEndpoint { + /** + * The node's role within the cluster. E.g. manager, logger, worker. + */ BifEnum::Supervisor::ClusterRole role; + /** + * The host/IP at which the cluster node is listening for connections. + */ std::string host; + /** + * The TCP port number at which the cluster node listens for connections. + */ int port; + /** + * The interface name from which the node read/analyze packets. + * Typically used by worker nodes. + */ std::optional interface; }; + /** + * Configuration options that influence behavior of a Supervised Zeek node. + */ struct NodeConfig { + /** + * Create configuration from script-layer record value. + * @param node_val the script-layer record value to convert. + */ static NodeConfig FromRecord(const RecordVal* node_val); + + /** + * Create configuration from JSON representation. + * @param json the JSON string to convert. + */ static NodeConfig FromJSON(std::string_view json); + /** + * Convert this object into JSON respresentation. + * @return the JSON string representing the node config. + */ std::string ToJSON() const; + + /** + * Convert his object into script-layer record value. + * @return the script-layer record value representing the node config. + */ IntrusivePtr ToRecord() const; + /** + * The name of the supervised Zeek node. These are unique within + * a given supervised process tree and typically human-readable. + */ std::string name; + /** + * The interface name from which the node should read/analyze packets. + */ std::optional interface; + /** + * The working directory that should be used by the node. + */ std::optional directory; + /** + * The filename/path to which the node's stdout will be redirected. + */ std::optional stdout_file; + /** + * The filename/path to which the node's stderr will be redirected. + */ std::optional stderr_file; + /** + * A cpu/core number to which the node will try to pin itself. + */ std::optional cpu_affinity; + /** + * Additional script filename/paths that the node should load. + */ std::vector scripts; + /** + * The Cluster Layout definition. Each node in the Cluster Framework + * knows about the full, static cluster topology to which it belongs. + * Entries in the map use node names for keys. + */ std::map cluster; }; + /** + * State which defines a Supervised node's understanding of itself. + */ struct SupervisedNode { + /** + * Initialize the Supervised node within the Zeek Cluster Framework. + * This function populates the "Cluster::nodes" script-layer variable + * that otherwise is expected to be populated by a + * "cluster-layout.zeek" script in other context (e.g. ZeekCtl + * generates that cluster layout). + */ static bool InitCluster(); + /** + * The node's configuration options. + */ NodeConfig config; + /** + * The process ID of the supervised node's parent process (i.e. the PID + * of the Stem process). + */ pid_t parent_pid; }; + /** + * The state of a supervised node from the Supervisor's perspective. + */ struct Node { + /** + * Convert the node into script-layer Supervisor::NodeStatus record + * representation. + */ IntrusivePtr ToRecord() const; + /** + * @return the name of the node. + */ const std::string& Name() const { return config.name; } + /** + * Create a new node state from a given configuration. + * @param arg_config the configuration to use for the node. + */ Node(NodeConfig arg_config) : config(std::move(arg_config)) { } + /** + * The desired configuration for the node. + */ NodeConfig config; + /** + * Process ID of the node (positive/non-zero are valid/live PIDs). + */ pid_t pid = 0; + /** + * Whether the node is voluntarily marked for termination by the + * Supervisor. + */ bool killed = false; + /** + * The last exit status of the node. + */ int exit_status = 0; + /** + * The last signal which terminated the node. + */ int signal_number = 0; + /** + * Number of process revival attempts made after the node first died + * prematurely. + */ int revival_attempts = 0; + /** + * How many seconds to wait until the next revival attempt for the node. + */ int revival_delay = 1; + /** + * The time at which the node's process was last spawned. + */ std::chrono::time_point spawn_time; }; - static std::optional RunStem(std::unique_ptr pipe, - pid_t parent_pid); + /** + * Run the Stem process. The Stem process will receive instructions from + * the Supervisor to manipulate the process hierarchy and it's in charge + * of directly monitoring for whether any nodes die premature and need + * to be revived. + * @param pipe bidirectional pipes that allow the Supervisor and Stem + * process to communicate. + * @param pid the Stem's parent process ID (i.e. the PID of the Supervisor) + * @return state which describes what a supervised node should know about + * itself. I.e. this function only returns from a fork()'d child process. + */ + static SupervisedNode RunStem(std::unique_ptr pipe, + pid_t parent_pid); using NodeMap = std::map>; + /** + * Create a new Supervisor object. + * @param stem_pipe bidirectional pipe that allow the Supervisor and Stem + * process to communicate. + * @param stem_pid the Stem's process ID. + */ Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); + /** + * Destruction also cleanly shuts down the entire supervised process tree. + */ ~Supervisor(); + /** + * @return the process ID of the Stem. + */ pid_t StemPID() const { return stem_pid; } - void ObserveChildSignal(int signo); - - RecordVal* Status(std::string_view node_name); - std::string Create(const RecordVal* node); - std::string Create(const Supervisor::NodeConfig& node); - bool Destroy(std::string_view node_name); - bool Restart(std::string_view node_name); - + /** + * @return the state of currently supervised processes. The map uses + * node names for keys. + */ const NodeMap& Nodes() { return nodes; } + /** + * Retrieve current status of a supervised node. + * @param node_name the name of the node for which to retrieve status + * or an empty string to mean "all nodes". + * @return script-layer Supervisor::Status record value describing the + * status of a node or set of nodes. + */ + RecordVal* Status(std::string_view node_name); + + /** + * Create a new supervised node. + * @param node the script-layer Supervisor::NodeConfig value that + * describes the desired node configuration + * @return an empty string on success or description of the error/failure + */ + std::string Create(const RecordVal* node); + + /** + * Create a new supervised node. + * @param node the desired node configuration + * @return an empty string on success or description of the error/failure + */ + std::string Create(const Supervisor::NodeConfig& node); + + /** + * Destroys and removes a supervised node. + * @param node_name the name of the node to destroy or an empty string + * to mean "all nodes" + * @return true on success + */ + bool Destroy(std::string_view node_name); + + /** + * Restart a supervised node process (by destroying and re-recreating). + * @param node_name the name of the node to restart or an empty string + * to mean "all nodes" + * @return true on success + */ + bool Restart(std::string_view node_name); + + /** + * Not meant for public use. For use in a signal handler to tell the + * Supervisor a child process (i.e. the Stem) potentially died. + */ + void ObserveChildSignal(int signo); + private: // IOSource interface overrides: @@ -140,6 +338,28 @@ private: std::string msg_buffer; }; +/** + * A timer used by supervised processes to periodically check whether their + * parent (supervisor) process has died. If it has died, the supervised + * process self-terminates. + */ +class ParentProcessCheckTimer : public Timer { +public: + + /** + * Create a timer to check for parent process death. + * @param t the time at which to trigger the timer's check. + * @param interval number of seconds to wait before checking again. + */ + ParentProcessCheckTimer(double t, double interval); + +protected: + + void Dispatch(double t, int is_expire) override; + + double interval; +}; + extern Supervisor* supervisor; extern std::optional supervised_node; diff --git a/src/supervisor.bif b/src/supervisor.bif index dc1c14e871..181f55cbd3 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -79,6 +79,21 @@ function Supervisor::__is_supervised%(%): bool return val_mgr->GetBool(zeek::supervised_node.has_value()); %} +function Supervisor::__node%(%): Supervisor::NodeConfig + %{ + if ( ! zeek::supervised_node ) + { + builtin_error("not a supervised process"); + auto rt = BifType::Record::Supervisor::NodeConfig; + auto rval = make_intrusive(rt); + rval->Assign(rt->FieldOffset("name"), new StringVal("")); + return rval.detach(); + } + + auto rval = zeek::supervised_node->config.ToRecord(); + return rval.detach(); + %} + function Supervisor::__is_supervisor%(%): bool %{ return val_mgr->GetBool(zeek::supervisor != nullptr); diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout index 722fe53d14..6e844e4641 100644 --- a/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.logger-1.stdout @@ -1,2 +1,2 @@ supervised node zeek_init(), logger-1, Cluster::LOGGER -supervised node zeek_done(), logger-1 +supervised node zeek_done(), logger-1, logger-1 diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout index e059737e0d..8c9567b50d 100644 --- a/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.manager.stdout @@ -1,2 +1,2 @@ supervised node zeek_init(), manager, Cluster::MANAGER -supervised node zeek_done(), manager +supervised node zeek_done(), manager, manager diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout index 5c5b4e3ca5..a11da802ad 100644 --- a/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.proxy-1.stdout @@ -1,2 +1,2 @@ supervised node zeek_init(), proxy-1, Cluster::PROXY -supervised node zeek_done(), proxy-1 +supervised node zeek_done(), proxy-1, proxy-1 diff --git a/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout b/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout index 0a1c84a25c..a2f6e71d47 100644 --- a/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout +++ b/testing/btest/Baseline/supervisor.config-cluster/zeek.worker-1.stdout @@ -1,2 +1,2 @@ supervised node zeek_init(), worker-1, Cluster::WORKER -supervised node zeek_done(), worker-1 +supervised node zeek_done(), worker-1, worker-1 diff --git a/testing/btest/supervisor/config-cluster.zeek b/testing/btest/supervisor/config-cluster.zeek index cea02e2020..6f8d290e0d 100644 --- a/testing/btest/supervisor/config-cluster.zeek +++ b/testing/btest/supervisor/config-cluster.zeek @@ -83,7 +83,7 @@ event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) event zeek_done() { if ( Supervisor::is_supervised() ) - print "supervised node zeek_done()", Cluster::node; + print "supervised node zeek_done()", Cluster::node, Supervisor::node()$name; else print supervisor_output_file, "supervisor zeek_done()"; } From 172456fac05072c66826d796c70dd59449e42857 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 21 Jan 2020 13:19:05 -0800 Subject: [PATCH 37/76] Convert supervisor internals to rapidjson --- src/Supervisor.cc | 58 +++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Supervisor.cc b/src/Supervisor.cc index ca3034f478..f8a8ff1636 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -18,7 +18,8 @@ #include "zeek-config.h" #include "util.h" -#include "3rdparty/json.hpp" +#define RAPIDJSON_HAS_STDSTRING 1 +#include "3rdparty/rapidjson/include/rapidjson/document.h" extern "C" { #include "setsignal.h" @@ -862,7 +863,7 @@ Supervisor::SupervisedNode Supervisor::RunStem(std::unique_ptr pi return s.Run(); } -static BifEnum::Supervisor::ClusterRole role_str_to_enum(const std::string& r) +static BifEnum::Supervisor::ClusterRole role_str_to_enum(std::string_view r) { if ( r == "Supervisor::LOGGER" ) return BifEnum::Supervisor::LOGGER; @@ -945,48 +946,47 @@ Supervisor::NodeConfig Supervisor::NodeConfig::FromRecord(const RecordVal* node) Supervisor::NodeConfig Supervisor::NodeConfig::FromJSON(std::string_view json) { Supervisor::NodeConfig rval; - auto j = nlohmann::json::parse(json); - rval.name = j["name"]; + rapidjson::Document j; + j.Parse(json.data(), json.size()); + rval.name = j["name"].GetString(); - if ( auto it = j.find("interface"); it != j.end() ) - rval.interface = *it; + if ( auto it = j.FindMember("interface"); it != j.MemberEnd() ) + rval.interface = it->value.GetString(); - if ( auto it = j.find("directory"); it != j.end() ) - rval.directory = *it; + if ( auto it = j.FindMember("directory"); it != j.MemberEnd() ) + rval.directory = it->value.GetString(); - if ( auto it = j.find("stdout_file"); it != j.end() ) - rval.stdout_file= *it; + if ( auto it = j.FindMember("stdout_file"); it != j.MemberEnd() ) + rval.stdout_file= it->value.GetString(); - if ( auto it = j.find("stderr_file"); it != j.end() ) - rval.stderr_file= *it; + if ( auto it = j.FindMember("stderr_file"); it != j.MemberEnd() ) + rval.stderr_file= it->value.GetString(); - if ( auto it = j.find("cpu_affinity"); it != j.end() ) - rval.cpu_affinity = *it; + if ( auto it = j.FindMember("cpu_affinity"); it != j.MemberEnd() ) + rval.cpu_affinity = it->value.GetInt(); - auto scripts = j["scripts"]; + auto& scripts = j["scripts"]; - for ( auto& s : scripts ) - rval.scripts.emplace_back(std::move(s)); + for ( auto it = scripts.Begin(); it != scripts.End(); ++it ) + rval.scripts.emplace_back(it->GetString()); - auto cluster = j["cluster"]; + auto& cluster = j["cluster"]; - for ( const auto& e : cluster.items() ) + for ( auto it = cluster.MemberBegin(); it != cluster.MemberEnd(); ++it ) { Supervisor::ClusterEndpoint ep; - auto& key = e.key(); - auto& val = e.value(); + auto key = it->name.GetString(); + auto& val = it->value; - auto role_str = val["role"]; - ep.role = role_str_to_enum(role_str); + auto& role_str = val["role"]; + ep.role = role_str_to_enum(role_str.GetString()); - ep.host = val["host"]; - ep.port = val["p"]["port"]; + ep.host = val["host"].GetString(); + ep.port = val["p"]["port"].GetInt(); - auto it = val.find("interface"); - - if ( it != val.end() ) - ep.interface = *it; + if ( auto it = val.FindMember("interface"); it != val.MemberEnd() ) + ep.interface = it->value.GetString(); rval.cluster.emplace(key, std::move(ep)); } From 718879735e5a5d80cd90f8df83e72d5c7c722f2b Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 21 Jan 2020 18:55:59 -0800 Subject: [PATCH 38/76] Address supervisor code re-factoring feedback from Robin --- src/CMakeLists.txt | 1 + src/Flare.h | 4 +- src/Options.cc | 52 +++++++++ src/Options.h | 77 +++++++++++++ src/Pipe.h | 40 +++++++ src/Supervisor.cc | 168 ++++++++++++++++++++++++---- src/Supervisor.h | 84 +++++++++++--- src/main.cc | 256 +++---------------------------------------- src/supervisor.bif | 28 ++--- src/zeek-affinity.cc | 6 + src/zeek-affinity.h | 2 + 11 files changed, 425 insertions(+), 293 deletions(-) create mode 100644 src/Options.cc create mode 100644 src/Options.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 114713151b..546680a8d7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -247,6 +247,7 @@ set(MAIN_SRCS NetVar.cc Obj.cc OpaqueVal.cc + Options.cc PacketFilter.cc Pipe.cc PolicyFile.cc diff --git a/src/Flare.h b/src/Flare.h index 4c781387bf..ea484b2858 100644 --- a/src/Flare.h +++ b/src/Flare.h @@ -27,14 +27,14 @@ public: /** * Put the object in the "ready" state. * @param signal_safe whether to skip error-reporting functionality that - * is not async-signal-safe + * is not async-signal-safe (errors still abort the process regardless) */ void Fire(bool signal_safe = false); /** * Take the object out of the "ready" state. * @param signal_safe whether to skip error-reporting functionality that - * is not async-signal-safe + * is not async-signal-safe (errors still abort the process regardless) * @return number of bytes read from the pipe, corresponds to the number * of times Fire() was called. */ diff --git a/src/Options.cc b/src/Options.cc new file mode 100644 index 0000000000..5c43b13477 --- /dev/null +++ b/src/Options.cc @@ -0,0 +1,52 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "Options.h" + +void zeek::Options::filter_supervisor_options() + { + pcap_filter = {}; + interfaces = {}; + pcap_files = {}; + signature_files = {}; + pcap_output_file = {}; + } + +void zeek::Options::filter_supervised_node_options() + { + auto og = *this; + *this = {}; + + debug_log_streams = og.debug_log_streams; + debug_script_tracing_file = og.debug_script_tracing_file; + script_code_to_exec = og.script_code_to_exec; + script_prefixes = og.script_prefixes; + + signature_re_level = og.signature_re_level; + ignore_checksums = og.ignore_checksums; + use_watchdog = og.use_watchdog; + pseudo_realtime = og.pseudo_realtime; + dns_mode = og.dns_mode; + + bare_mode = og.bare_mode; + perftools_check_leaks = og.perftools_check_leaks; + perftools_profile = og.perftools_profile; + + pcap_filter = og.pcap_filter; + signature_files = og.signature_files; + + // TODO: These are likely to be handled in a node-specific or + // use-case-specific way. e.g. interfaces is already handled for the + // "cluster" use-case, but don't have supervised-pcap-reading + // functionality yet. + /* interfaces = og.interfaces; */ + /* pcap_files = og.pcap_files; */ + + pcap_output_file = og.pcap_output_file; + random_seed_input_file = og.random_seed_input_file; + random_seed_output_file = og.random_seed_output_file; + process_status_file = og.process_status_file; + + plugins_to_load = og.plugins_to_load; + scripts_to_load = og.scripts_to_load; + script_options_to_set = og.script_options_to_set; + } diff --git a/src/Options.h b/src/Options.h new file mode 100644 index 0000000000..72d0389c74 --- /dev/null +++ b/src/Options.h @@ -0,0 +1,77 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include +#include +#include + +#include "DNS_Mgr.h" + +namespace zeek { + +/** + * Options that define general Zeek processing behavior, usually determined + * from command-line arguments. + */ +struct Options { + /** + * Unset options that aren't meant to be used by the supervisor, but may + * make sense for supervised nodes to inherit (as opposed to flagging + * as an error an exiting outright if used in supervisor-mode). + */ + void filter_supervisor_options(); + + /** + * Inherit certain options set in the original supervisor parent process + * and discard the rest. + */ + void filter_supervised_node_options(); + + bool print_version = false; + bool print_usage = false; + bool print_execution_time = false; + bool print_signature_debug_info = false; + int print_plugins = 0; + + std::optional debug_log_streams; + std::optional debug_script_tracing_file; + + std::optional identifier_to_print; + std::optional script_code_to_exec; + std::vector script_prefixes = { "" }; // "" = "no prefix" + + int signature_re_level = 4; + bool ignore_checksums = false; + bool use_watchdog = false; + double pseudo_realtime = 0; + DNS_MgrMode dns_mode = DNS_DEFAULT; + + bool supervisor_mode = false; + bool parse_only = false; + bool bare_mode = false; + bool debug_scripts = false; + bool perftools_check_leaks = false; + bool perftools_profile = false; + + bool run_unit_tests = false; + std::vector doctest_args; + + std::optional pcap_filter; + std::vector interfaces; + std::vector pcap_files; + std::vector signature_files; + + std::optional pcap_output_file; + std::optional random_seed_input_file; + std::optional random_seed_output_file; + std::optional process_status_file; + std::optional zeekygen_config_file; + std::string libidmef_dtd_file = "idmef-message.dtd"; + + std::set plugins_to_load; + std::vector scripts_to_load; + std::vector script_options_to_set; +}; + +} // namespace zeek diff --git a/src/Pipe.h b/src/Pipe.h index 18786c75ad..e310c83edc 100644 --- a/src/Pipe.h +++ b/src/Pipe.h @@ -66,32 +66,72 @@ private: int status_flags[2]; }; +/** + * A pair of pipes that can be used for bi-directinoal IPC. + */ class PipePair { public: + /** + * Create a pair of pipes + * @param flags file descriptor flags to set on pipes + * @status_flags descriptor status flags to set on pipes + * @fds may be supplied to open existing file descriptors rather + * than create ones from a new pair of pipes. Should point to memory + * containing four consecutive file descriptors, "read" end and "write" end + * of the first pipe followed by the "read" end and "write" end of the + * second pipe. + */ PipePair(int flags, int status_flags, int* fds = nullptr); + /** + * @return the pipe used for receiving input + */ Pipe& In() { return pipes[swapped]; } + /** + * @return the pipe used for sending output + */ Pipe& Out() { return pipes[!swapped]; } + /** + * @return the pipe used for receiving input + */ const Pipe& In() const { return pipes[swapped]; } + /** + * @return the pipe used for sending output + */ const Pipe& Out() const { return pipes[!swapped]; } + /** + * @return a file descriptor that may used for receiving messages by + * polling/reading it. + */ int InFD() const { return In().ReadFD(); } + /** + * @return a file descriptor that may be used for sending messages by + * writing to it. + */ int OutFD() const { return Out().WriteFD(); } + /** + * Swaps the meaning of the pipes in the pair. E.g. call this after + * fork()'ing so that the child process uses the right pipe for + * reading/writing. + */ void Swap() { swapped = ! swapped; } +private: + Pipe pipes[2]; bool swapped = false; }; diff --git a/src/Supervisor.cc b/src/Supervisor.cc index f8a8ff1636..22de934495 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -1,3 +1,4 @@ +// See the file "COPYING" in the main distribution directory for copyright. #include #include @@ -17,6 +18,7 @@ #include "NetVar.h" #include "zeek-config.h" #include "util.h" +#include "zeek-affinity.h" #define RAPIDJSON_HAS_STDSTRING 1 #include "3rdparty/rapidjson/include/rapidjson/document.h" @@ -33,9 +35,11 @@ extern "C" { using namespace zeek; +std::optional Supervisor::supervised_node; + namespace { struct Stem { - Stem(std::unique_ptr p, pid_t parent_pid); + Stem(Supervisor::StemState stem_state); ~Stem(); @@ -98,7 +102,7 @@ static RETSIGTYPE stem_signal_handler(int signo) static RETSIGTYPE supervisor_signal_handler(int signo) { - supervisor->ObserveChildSignal(signo); + supervisor_mgr->ObserveChildSignal(signo); return RETSIGVAL; } @@ -143,7 +147,7 @@ void ParentProcessCheckTimer::Dispatch(double t, int is_expire) // Linux: prctl(PR_SET_PDEATHSIG, ...) // FreeBSD: procctl(PROC_PDEATHSIG_CTL) // Also note the Stem process has its own polling loop with similar logic. - if ( zeek::supervised_node->parent_pid != getppid() ) + if ( zeek::Supervisor::ThisNode()->parent_pid != getppid() ) zeek_terminate_loop("supervised node was orphaned"); if ( ! is_expire ) @@ -151,10 +155,8 @@ void ParentProcessCheckTimer::Dispatch(double t, int is_expire) interval)); } -Supervisor::Supervisor(Supervisor::Config cfg, - std::unique_ptr pipe, - pid_t arg_stem_pid) - : config(std::move(cfg)), stem_pid(arg_stem_pid), stem_pipe(std::move(pipe)) +Supervisor::Supervisor(Supervisor::Config cfg, StemState ss) + : config(std::move(cfg)), stem_pid(ss.pid), stem_pipe(std::move(ss.pipe)) { DBG_LOG(DBG_SUPERVISOR, "forked stem process %d", stem_pid); setsignal(SIGCHLD, supervisor_signal_handler); @@ -179,8 +181,7 @@ Supervisor::Supervisor(Supervisor::Config cfg, fprintf(stderr, "Supervisor stem died early by signal %d\n", WTERMSIG(status)); else - fprintf(stderr, "Supervisor stem died early for unknown reason\n", - WTERMSIG(status)); + fprintf(stderr, "Supervisor stem died early for unknown reason\n"); } exit(1); @@ -411,8 +412,8 @@ size_t Supervisor::ProcessMessages() return msgs.size(); } -Stem::Stem(std::unique_ptr p, pid_t ppid) - : parent_pid(ppid), signal_flare(new bro::Flare()), pipe(std::move(p)) +Stem::Stem(Supervisor::StemState ss) + : parent_pid(ss.parent_pid), signal_flare(new bro::Flare()), pipe(std::move(ss.pipe)) { zeek::set_thread_name("zeek.stem"); pipe->Swap(); @@ -857,10 +858,66 @@ std::optional Stem::Poll() return {}; } -Supervisor::SupervisedNode Supervisor::RunStem(std::unique_ptr pipe, pid_t parent_pid) +std::optional Supervisor::CreateStem(bool supervisor_mode) { - Stem s(std::move(pipe), parent_pid); - return s.Run(); + // If the Stem needs to be re-created via fork()/exec(), then the necessary + // state information is communicated via ZEEK_STEM env. var. + auto zeek_stem_env = getenv("ZEEK_STEM"); + + if ( zeek_stem_env ) + { + std::vector zeek_stem_nums; + tokenize_string(zeek_stem_env, ",", &zeek_stem_nums); + + if ( zeek_stem_nums.size() != 5 ) + { + fprintf(stderr, "invalid ZEEK_STEM environment variable value: '%s'\n", + zeek_stem_env); + exit(1); + } + + pid_t stem_ppid = std::stoi(zeek_stem_nums[0]); + int fds[4]; + + for ( auto i = 0; i < 4; ++i ) + fds[i] = std::stoi(zeek_stem_nums[i + 1]); + + StemState ss; + ss.pipe = std::make_unique(FD_CLOEXEC, O_NONBLOCK, fds); + ss.parent_pid = stem_ppid; + zeek::Supervisor::RunStem(std::move(ss)); + return {}; + } + + if ( ! supervisor_mode ) + return {}; + + StemState ss; + ss.pipe = std::make_unique(FD_CLOEXEC, O_NONBLOCK); + ss.parent_pid = getpid(); + ss.pid = fork(); + + if ( ss.pid == -1 ) + { + fprintf(stderr, "failed to fork Zeek supervisor stem process: %s\n", + strerror(errno)); + exit(1); + } + + if ( ss.pid == 0 ) + { + zeek::Supervisor::RunStem(std::move(ss)); + return {}; + } + + return std::optional(std::move(ss)); + } + +Supervisor::SupervisedNode Supervisor::RunStem(StemState stem_state) + { + Stem s(std::move(stem_state)); + supervised_node = s.Run(); + return *supervised_node; } static BifEnum::Supervisor::ClusterRole role_str_to_enum(std::string_view r) @@ -908,7 +965,7 @@ Supervisor::NodeConfig Supervisor::NodeConfig::FromRecord(const RecordVal* node) auto scripts_val = node->Lookup("scripts")->AsVectorVal(); - for ( auto i = 0; i < scripts_val->Size(); ++i ) + for ( auto i = 0u; i < scripts_val->Size(); ++i ) { auto script = scripts_val->Lookup(i)->AsStringVal()->ToStdString(); rval.scripts.emplace_back(std::move(script)); @@ -1088,9 +1145,9 @@ static Val* supervisor_role_to_cluster_node_type(BifEnum::Supervisor::ClusterRol } } -bool Supervisor::SupervisedNode::InitCluster() +bool Supervisor::SupervisedNode::InitCluster() const { - if ( supervised_node->config.cluster.empty() ) + if ( config.cluster.empty() ) return false; auto cluster_node_type = global_scope()->Lookup("Cluster::Node")->AsType()->AsRecordType(); @@ -1100,7 +1157,7 @@ bool Supervisor::SupervisedNode::InitCluster() auto has_logger = false; std::optional manager_name; - for ( const auto& e : supervised_node->config.cluster ) + for ( const auto& e : config.cluster ) { if ( e.second.role == BifEnum::Supervisor::MANAGER ) manager_name = e.first; @@ -1108,7 +1165,7 @@ bool Supervisor::SupervisedNode::InitCluster() has_logger = true; } - for ( const auto& e : supervised_node->config.cluster ) + for ( const auto& e : config.cluster ) { const auto& node_name = e.first; const auto& ep = e.second; @@ -1135,6 +1192,79 @@ bool Supervisor::SupervisedNode::InitCluster() return true; } +void Supervisor::SupervisedNode::Init(zeek::Options* options) const + { + const auto& node_name = config.name; + + if ( config.directory ) + { + if ( chdir(config.directory->data()) ) + { + fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", + node_name.data(), config.directory->data(), + strerror(errno)); + exit(1); + } + } + + if ( config.stderr_file ) + { + auto fd = open(config.stderr_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", + node_name.data(), config.stderr_file->data(), + strerror(errno)); + exit(1); + } + } + + if ( config.stdout_file ) + { + auto fd = open(config.stdout_file->data(), + O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, + 0600); + + if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) + { + fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", + node_name.data(), config.stdout_file->data(), + strerror(errno)); + exit(1); + } + } + + if ( config.cpu_affinity ) + { + auto res = zeek::set_affinity(*config.cpu_affinity); + + if ( ! res ) + fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", + node_name.data(), strerror(errno)); + } + + if ( ! config.cluster.empty() ) + { + if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) + { + fprintf(stderr, "node '%s' failed to setenv: %s\n", + node_name.data(), strerror(errno)); + exit(1); + } + } + + options->filter_supervised_node_options(); + + if ( config.interface ) + options->interfaces.emplace_back(*config.interface); + + for ( const auto& s : config.scripts ) + options->scripts_to_load.emplace_back(s); + } + RecordVal* Supervisor::Status(std::string_view node_name) { auto rval = new RecordVal(BifType::Record::Supervisor::Status); diff --git a/src/Supervisor.h b/src/Supervisor.h index f0ffe345dc..32b07e5aff 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -1,3 +1,5 @@ +// See the file "COPYING" in the main distribution directory for copyright. + #pragma once #include @@ -17,6 +19,7 @@ #include "Flare.h" #include "NetVar.h" #include "IntrusivePtr.h" +#include "Options.h" namespace zeek { @@ -149,8 +152,17 @@ public: * that otherwise is expected to be populated by a * "cluster-layout.zeek" script in other context (e.g. ZeekCtl * generates that cluster layout). + * @return true if the supervised node is using the Cluster Framework + * else false. */ - static bool InitCluster(); + bool InitCluster() const; + + /** + * Initialize the Supervised node. + * @param options the Zeek options to extend/modify as appropriate + * for the node's configuration. + */ + void Init(zeek::Options* options) const; /** * The node's configuration options. @@ -223,28 +235,50 @@ public: }; /** - * Run the Stem process. The Stem process will receive instructions from - * the Supervisor to manipulate the process hierarchy and it's in charge - * of directly monitoring for whether any nodes die premature and need - * to be revived. - * @param pipe bidirectional pipes that allow the Supervisor and Stem - * process to communicate. - * @param pid the Stem's parent process ID (i.e. the PID of the Supervisor) - * @return state which describes what a supervised node should know about - * itself. I.e. this function only returns from a fork()'d child process. + * State used to initalialize the Stem process. */ - static SupervisedNode RunStem(std::unique_ptr pipe, - pid_t parent_pid); + struct StemState { + /** + * Bidirectional pipes that allow the Supervisor and Stem to talk. + */ + std::unique_ptr pipe; + /** + * The Stem's parent process ID (i.e. PID of the Supervisor). + */ + pid_t parent_pid = 0; + /** + * The Stem's process ID. + */ + pid_t pid = 0; + }; + + /** + * Create and run the Stem process if necessary. + * @param supervisor_mode whether Zeek was invoked with the supervisor + * mode specified as command-line argument/option. + * @return state that defines the Stem process if called from the + * Supervisor process. The Stem process itself will not return from this, + * function but a node it spawns via fork() will return from it and + * information about it is available in ThisNode(). + */ + static std::optional CreateStem(bool supervisor_mode); + + /** + * @return the state which describes what a supervised node should know + * about itself if this is a supervised process. If called from a process + * that is not supervised, this returns an "empty" object. + */ + static const std::optional& ThisNode() + { return supervised_node; } using NodeMap = std::map>; /** * Create a new Supervisor object. - * @param stem_pipe bidirectional pipe that allow the Supervisor and Stem - * process to communicate. - * @param stem_pid the Stem's process ID. + * @param stem_state information about the Stem process that was already + * created via CreateStem() */ - Supervisor(Config cfg, std::unique_ptr stem_pipe, pid_t stem_pid); + Supervisor(Config cfg, StemState stem_state); /** * Destruction also cleanly shuts down the entire supervised process tree. @@ -329,6 +363,21 @@ private: const char* Tag() override { return "zeek::Supervisor"; } + /** + * Run the Stem process. The Stem process will receive instructions from + * the Supervisor to manipulate the process hierarchy and it's in charge + * of directly monitoring for whether any nodes die premature and need + * to be revived. + * @param pipe bidirectional pipes that allow the Supervisor and Stem + * process to communicate. + * @param pid the Stem's parent process ID (i.e. the PID of the Supervisor) + * @return state which describes what a supervised node should know about + * itself. I.e. this function only returns from a fork()'d child process. + */ + static SupervisedNode RunStem(StemState stem_state); + + static std::optional supervised_node; + Config config; pid_t stem_pid; std::unique_ptr stem_pipe; @@ -360,7 +409,6 @@ protected: double interval; }; -extern Supervisor* supervisor; -extern std::optional supervised_node; +extern Supervisor* supervisor_mgr; } // namespace zeek diff --git a/src/main.cc b/src/main.cc index 63ba0c62d3..d26cdaaa48 100644 --- a/src/main.cc +++ b/src/main.cc @@ -23,6 +23,7 @@ extern "C" { #include #include +#include "Options.h" #include "bsd-getopt-long.h" #include "input.h" #include "DNS_Mgr.h" @@ -78,8 +79,6 @@ extern "C" { #include "setsignal.h" }; -#include "zeek-affinity.h" - #ifdef USE_PERFTOOLS_DEBUG HeapLeakChecker* heap_checker = 0; int perftools_leaks = 0; @@ -99,8 +98,7 @@ file_analysis::Manager* file_mgr = 0; zeekygen::Manager* zeekygen_mgr = 0; iosource::Manager* iosource_mgr = 0; bro_broker::Manager* broker_mgr = 0; -zeek::Supervisor* zeek::supervisor = 0; -std::optional zeek::supervised_node; +zeek::Supervisor* zeek::supervisor_mgr = 0; std::vector zeek_script_prefixes; Stmt* stmts; @@ -197,7 +195,7 @@ static void usage(const char* prog, int code = 1) fprintf(stderr, " -M|--mem-profile | record heap [perftools]\n"); #endif fprintf(stderr, " --pseudo-realtime[=] | enable pseudo-realtime for performance evaluation (default 1)\n"); - fprintf(stderr, " -j|--jobs[=] | enable supervisor mode with N workers (default 1)\n"); + fprintf(stderr, " -j|--jobs | enable supervisor mode\n"); #ifdef USE_IDMEF fprintf(stderr, " -n|--idmef-dtd | specify path to IDMEF DTD file\n"); @@ -221,185 +219,6 @@ static void usage(const char* prog, int code = 1) exit(code); } -struct zeek_options { - bool print_version = false; - bool print_usage = false; - bool print_execution_time = false; - bool print_signature_debug_info = false; - int print_plugins = 0; - - std::optional debug_log_streams; - std::optional debug_script_tracing_file; - - std::optional identifier_to_print; - std::optional script_code_to_exec; - std::vector script_prefixes = { "" }; // "" = "no prefix" - - int signature_re_level = 4; - bool ignore_checksums = false; - bool use_watchdog = false; - double pseudo_realtime = 0; - DNS_MgrMode dns_mode = DNS_DEFAULT; - - bool supervisor_mode = false; - bool parse_only = false; - bool bare_mode = false; - bool debug_scripts = false; - bool perftools_check_leaks = false; - bool perftools_profile = false; - - bool run_unit_tests = false; - std::vector doctest_args; - - std::optional pcap_filter; - std::vector interfaces; - std::vector pcap_files; - std::vector signature_files; - - std::optional pcap_output_file; - std::optional random_seed_input_file; - std::optional random_seed_output_file; - std::optional process_status_file; - std::optional zeekygen_config_file; - std::string libidmef_dtd_file = "idmef-message.dtd"; - - std::set plugins_to_load; - std::vector scripts_to_load; - std::vector script_options_to_set; - - /** - * Unset options that aren't meant to be used by the supervisor, but may - * make sense for supervised nodes to inherit (as opposed to flagging - * as an error an exiting outright if used in supervisor-mode). - */ - void filter_supervisor_options() - { - pcap_filter = {}; - interfaces = {}; - pcap_files = {}; - signature_files = {}; - pcap_output_file = {}; - } - - /** - * Inherit certain options set in the original supervisor parent process - * and discard the rest. - */ - void filter_supervised_node_options() - { - auto og = *this; - *this = {}; - - debug_log_streams = og.debug_log_streams; - debug_script_tracing_file = og.debug_script_tracing_file; - script_code_to_exec = og.script_code_to_exec; - script_prefixes = og.script_prefixes; - - signature_re_level = og.signature_re_level; - ignore_checksums = og.ignore_checksums; - use_watchdog = og.use_watchdog; - pseudo_realtime = og.pseudo_realtime; - dns_mode = og.dns_mode; - - bare_mode = og.bare_mode; - perftools_check_leaks = og.perftools_check_leaks; - perftools_profile = og.perftools_profile; - - pcap_filter = og.pcap_filter; - signature_files = og.signature_files; - - // TODO: These are likely to be handled in a node-specific or - // use-case-specific way. e.g. interfaces is already handled for the - // "cluster" use-case, but don't have supervised-pcap-reading - // functionality yet. - /* interfaces = og.interfaces; */ - /* pcap_files = og.pcap_files; */ - - pcap_output_file = og.pcap_output_file; - random_seed_input_file = og.random_seed_input_file; - random_seed_output_file = og.random_seed_output_file; - process_status_file = og.process_status_file; - - plugins_to_load = og.plugins_to_load; - scripts_to_load = og.scripts_to_load; - script_options_to_set = og.script_options_to_set; - } -}; - -static void init_supervised_node(zeek_options* options) - { - const auto& config = zeek::supervised_node->config; - const auto& node_name = config.name; - - if ( config.directory ) - { - if ( chdir(config.directory->data()) ) - { - fprintf(stderr, "node '%s' failed to chdir to %s: %s\n", - node_name.data(), config.directory->data(), - strerror(errno)); - exit(1); - } - } - - if ( config.stderr_file ) - { - auto fd = open(config.stderr_file->data(), - O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, - 0600); - - if ( fd == -1 || dup2(fd, STDERR_FILENO) == -1 ) - { - fprintf(stderr, "node '%s' failed to create stderr file %s: %s\n", - node_name.data(), config.stderr_file->data(), - strerror(errno)); - exit(1); - } - } - - if ( config.stdout_file ) - { - auto fd = open(config.stdout_file->data(), - O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_CLOEXEC, - 0600); - - if ( fd == -1 || dup2(fd, STDOUT_FILENO) == -1 ) - { - fprintf(stderr, "node '%s' failed to create stdout file %s: %s\n", - node_name.data(), config.stdout_file->data(), - strerror(errno)); - exit(1); - } - } - - if ( config.cpu_affinity ) - { - auto res = zeek::set_affinity(*config.cpu_affinity); - - if ( ! res ) - fprintf(stderr, "node '%s' failed to set CPU affinity: %s\n", - node_name.data(), strerror(errno)); - } - - options->filter_supervised_node_options(); - - if ( config.interface ) - options->interfaces.emplace_back(*config.interface); - - if ( ! config.cluster.empty() ) - { - if ( setenv("CLUSTER_NODE", node_name.data(), true) == -1 ) - { - fprintf(stderr, "node '%s' failed to setenv: %s\n", - node_name.data(), strerror(errno)); - exit(1); - } - } - - for ( const auto& s : config.scripts ) - options->scripts_to_load.emplace_back(s); - } - static std::vector to_cargs(const std::vector& args) { std::vector rval; @@ -411,9 +230,9 @@ static std::vector to_cargs(const std::vector& args) return rval; } -static zeek_options parse_cmdline(int argc, char** argv) +static zeek::Options parse_cmdline(int argc, char** argv) { - zeek_options rval = {}; + zeek::Options rval; // When running unit tests, the first argument on the command line must be // --test, followed by doctest options. Optionally, users can use "--" as @@ -522,7 +341,7 @@ static zeek_options parse_cmdline(int argc, char** argv) // getopt may permute the array, so need yet another array auto zargs = std::make_unique(zeek_args.size()); - for ( auto i = 0; i < zeek_args.size(); ++i ) + for ( auto i = 0u; i < zeek_args.size(); ++i ) zargs[i] = zeek_args[i].data(); while ( (op = getopt_long(zeek_args.size(), zargs.get(), opts, long_opts, &long_optsind)) != EOF ) @@ -671,7 +490,7 @@ static zeek_options parse_cmdline(int argc, char** argv) // Process remaining arguments. X=Y arguments indicate script // variable/parameter assignments. X::Y arguments indicate plugins to // activate/query. The remainder are treated as scripts to load. - while ( optind < zeek_args.size() ) + while ( optind < static_cast(zeek_args.size()) ) { if ( strchr(zargs[optind], '=') ) rval.script_options_to_set.emplace_back(zargs[optind++]); @@ -1017,52 +836,10 @@ int main(int argc, char** argv) return context.run(); } - pid_t stem_pid = 0; - std::unique_ptr supervisor_pipe; - auto zeek_stem_env = getenv("ZEEK_STEM"); + auto stem_state = zeek::Supervisor::CreateStem(options.supervisor_mode); - if ( zeek_stem_env ) - { - std::vector zeek_stem_nums; - tokenize_string(zeek_stem_env, ",", &zeek_stem_nums); - - if ( zeek_stem_nums.size() != 5 ) - { - fprintf(stderr, "invalid ZEEK_STEM environment variable value: '%s'\n", - zeek_stem_env); - exit(1); - } - - pid_t stem_ppid = std::stoi(zeek_stem_nums[0]); - int fds[4]; - - for ( auto i = 0; i < 4; ++i ) - fds[i] = std::stoi(zeek_stem_nums[i + 1]); - - supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK, fds}); - zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe), - stem_ppid); - } - else if ( options.supervisor_mode ) - { - supervisor_pipe.reset(new bro::PipePair{FD_CLOEXEC, O_NONBLOCK}); - auto stem_ppid = getpid(); - stem_pid = fork(); - - if ( stem_pid == -1 ) - { - fprintf(stderr, "failed to fork Zeek supervisor stem process: %s\n", - strerror(errno)); - exit(1); - } - - if ( stem_pid == 0 ) - zeek::supervised_node = zeek::Supervisor::RunStem(std::move(supervisor_pipe), - stem_ppid); - } - - if ( zeek::supervised_node ) - init_supervised_node(&options); + if ( zeek::Supervisor::ThisNode() ) + zeek::Supervisor::ThisNode()->Init(&options); double time_start = current_time(true); @@ -1134,9 +911,8 @@ int main(int argc, char** argv) zeek::Supervisor::Config cfg = {}; cfg.zeek_exe_path = zeek_exe_path; options.filter_supervisor_options(); - zeek::supervisor = new zeek::Supervisor(std::move(cfg), - std::move(supervisor_pipe), - stem_pid); + zeek::supervisor_mgr = new zeek::Supervisor(std::move(cfg), + std::move(*stem_state)); } const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); @@ -1191,7 +967,7 @@ int main(int argc, char** argv) options.interfaces.size() == 0 && ! options.identifier_to_print && ! command_line_policy && ! options.print_plugins && - ! options.supervisor_mode && ! zeek::supervised_node ) + ! options.supervisor_mode && ! zeek::Supervisor::ThisNode() ) add_input_file("-"); for ( const auto& script_option : options.script_options_to_set ) @@ -1510,8 +1286,8 @@ int main(int argc, char** argv) iosource_mgr->Register(thread_mgr, true); - if ( zeek::supervisor ) - iosource_mgr->Register(zeek::supervisor); + if ( zeek::supervisor_mgr ) + iosource_mgr->Register(zeek::supervisor_mgr); if ( iosource_mgr->Size() > 0 || have_pending_timers || @@ -1532,7 +1308,7 @@ int main(int argc, char** argv) #endif - if ( zeek::supervised_node ) + if ( zeek::Supervisor::ThisNode() ) timer_mgr->Add(new zeek::ParentProcessCheckTimer(1, 1)); double time_net_start = current_time(true);; diff --git a/src/supervisor.bif b/src/supervisor.bif index 181f55cbd3..17c67d1d33 100644 --- a/src/supervisor.bif +++ b/src/supervisor.bif @@ -21,67 +21,67 @@ type Supervisor::NodeStatus: record; function Supervisor::__status%(node: string%): Supervisor::Status %{ - if ( ! zeek::supervisor ) + if ( ! zeek::supervisor_mgr ) { builtin_error("supervisor mode not enabled"); return new RecordVal(BifType::Record::Supervisor::Status); } - return zeek::supervisor->Status(node->CheckString()); + return zeek::supervisor_mgr->Status(node->CheckString()); %} function Supervisor::__create%(node: Supervisor::NodeConfig%): string %{ - if ( ! zeek::supervisor ) + if ( ! zeek::supervisor_mgr ) { builtin_error("supervisor mode not enabled"); return new StringVal("supervisor mode not enabled"); } - auto rval = zeek::supervisor->Create(node->AsRecordVal()); + auto rval = zeek::supervisor_mgr->Create(node->AsRecordVal()); return new StringVal(rval); %} function Supervisor::__destroy%(node: string%): bool %{ - if ( ! zeek::supervisor ) + if ( ! zeek::supervisor_mgr ) { builtin_error("supervisor mode not enabled"); return val_mgr->GetBool(false); } - auto rval = zeek::supervisor->Destroy(node->CheckString()); + auto rval = zeek::supervisor_mgr->Destroy(node->CheckString()); return val_mgr->GetBool(rval); %} function Supervisor::__restart%(node: string%): bool %{ - if ( ! zeek::supervisor ) + if ( ! zeek::supervisor_mgr ) { builtin_error("supervisor mode not enabled"); return val_mgr->GetBool(false); } - auto rval = zeek::supervisor->Restart(node->CheckString()); + auto rval = zeek::supervisor_mgr->Restart(node->CheckString()); return val_mgr->GetBool(rval); %} function Supervisor::__init_cluster%(%): bool %{ - if ( zeek::supervised_node ) - return val_mgr->GetBool(zeek::supervised_node->InitCluster()); + if ( zeek::Supervisor::ThisNode() ) + return val_mgr->GetBool(zeek::Supervisor::ThisNode()->InitCluster()); return val_mgr->GetBool(false); %} function Supervisor::__is_supervised%(%): bool %{ - return val_mgr->GetBool(zeek::supervised_node.has_value()); + return val_mgr->GetBool(zeek::Supervisor::ThisNode().has_value()); %} function Supervisor::__node%(%): Supervisor::NodeConfig %{ - if ( ! zeek::supervised_node ) + if ( ! zeek::Supervisor::ThisNode() ) { builtin_error("not a supervised process"); auto rt = BifType::Record::Supervisor::NodeConfig; @@ -90,11 +90,11 @@ function Supervisor::__node%(%): Supervisor::NodeConfig return rval.detach(); } - auto rval = zeek::supervised_node->config.ToRecord(); + auto rval = zeek::Supervisor::ThisNode()->config.ToRecord(); return rval.detach(); %} function Supervisor::__is_supervisor%(%): bool %{ - return val_mgr->GetBool(zeek::supervisor != nullptr); + return val_mgr->GetBool(zeek::supervisor_mgr != nullptr); %} diff --git a/src/zeek-affinity.cc b/src/zeek-affinity.cc index 7fef4c203a..da23495d4b 100644 --- a/src/zeek-affinity.cc +++ b/src/zeek-affinity.cc @@ -1,5 +1,11 @@ // See the file "COPYING" in the main distribution directory for copyright. +// This is all in its own source file primarily because the Linux +// implementation uses the _GNU_SOURCE feature test macro which must be +// defined before including any header file and lumping this together with +// other util functions makes that requirement less apparent and less +// self-contained. + #if defined(__linux__) #if !defined(_GNU_SOURCE) diff --git a/src/zeek-affinity.h b/src/zeek-affinity.h index ce4bdd9c49..f8f173634b 100644 --- a/src/zeek-affinity.h +++ b/src/zeek-affinity.h @@ -1,5 +1,7 @@ // See the file "COPYING" in the main distribution directory for copyright. +#pragma once + namespace zeek { /** From 59e075acabacbf58e07b51322982e6cb855de75e Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 11:23:10 -0800 Subject: [PATCH 39/76] Move supervisor source files into supervisor/ --- src/CMakeLists.txt | 9 +++++++-- src/Net.cc | 1 - src/main.cc | 2 +- src/{ => supervisor}/Supervisor.cc | 0 src/{ => supervisor}/Supervisor.h | 0 src/{ => supervisor}/supervisor.bif | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) rename src/{ => supervisor}/Supervisor.cc (100%) rename src/{ => supervisor}/Supervisor.h (100%) rename src/{ => supervisor}/supervisor.bif (98%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 546680a8d7..4d23220ab6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,7 +107,11 @@ set(BIF_SRCS strings.bif reporter.bif option.bif - supervisor.bif + # Note: the supervisor BIF file is treated like other top-level BIFs + # instead of contained in its own subdirectory CMake logic because + # subdirectory BIFs are treated differently and don't support being called + # *during* parsing (e.g. within an @if directive). + supervisor/supervisor.bif ) foreach (bift ${BIF_SRCS}) @@ -267,7 +271,6 @@ set(MAIN_SRCS Notifier.cc Stats.cc Stmt.cc - Supervisor.cc Tag.cc Timer.cc Traverse.cc @@ -288,6 +291,8 @@ set(MAIN_SRCS modp_numtoa.c siphash24.c + supervisor/Supervisor.cc + threading/BasicThread.cc threading/Formatter.cc threading/Manager.cc diff --git a/src/Net.cc b/src/Net.cc index 8c7e976bbc..4ef0a0a54a 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -33,7 +33,6 @@ #include "iosource/PktDumper.h" #include "plugin/Manager.h" #include "broker/Manager.h" -#include "Supervisor.h" extern "C" { #include "setsignal.h" diff --git a/src/main.cc b/src/main.cc index d26cdaaa48..98661df2ea 100644 --- a/src/main.cc +++ b/src/main.cc @@ -46,7 +46,7 @@ extern "C" { #include "Brofiler.h" #include "Traverse.h" -#include "Supervisor.h" +#include "supervisor/Supervisor.h" #include "threading/Manager.h" #include "input/Manager.h" #include "logging/Manager.h" diff --git a/src/Supervisor.cc b/src/supervisor/Supervisor.cc similarity index 100% rename from src/Supervisor.cc rename to src/supervisor/Supervisor.cc diff --git a/src/Supervisor.h b/src/supervisor/Supervisor.h similarity index 100% rename from src/Supervisor.h rename to src/supervisor/Supervisor.h diff --git a/src/supervisor.bif b/src/supervisor/supervisor.bif similarity index 98% rename from src/supervisor.bif rename to src/supervisor/supervisor.bif index 17c67d1d33..b97c562deb 100644 --- a/src/supervisor.bif +++ b/src/supervisor/supervisor.bif @@ -1,7 +1,7 @@ ##! The BIFs that define the Zeek supervisor control interface. %%{ -#include "Supervisor.h" +#include "supervisor/Supervisor.h" %%} module Supervisor; From 68b513a3640bdada086ddbe86e227bb74b25e1c8 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 13:17:38 -0800 Subject: [PATCH 40/76] Fix supervisor "destroy" call on nodes not currently alive This would mistakenly have the Stem process kill itself due to giving PID 0 as argument to kill() where it really was being used to mean "that node does not currently have any live process associated with it" and so can just be removed without trying to kill/reap. --- src/supervisor/Supervisor.cc | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/supervisor/Supervisor.cc b/src/supervisor/Supervisor.cc index 22de934495..4d971a1068 100644 --- a/src/supervisor/Supervisor.cc +++ b/src/supervisor/Supervisor.cc @@ -458,6 +458,13 @@ void Stem::Reap() bool Stem::Wait(Supervisor::Node* node, int options) const { + if ( node->pid <= 0 ) + { + DBG_STEM("Stem skip waiting for node '%s' (PID %d) to terminate: already dead", + node->Name().data(), node->pid); + return true; + } + int status; auto res = waitpid(node->pid, &status, options); @@ -502,6 +509,13 @@ bool Stem::Wait(Supervisor::Node* node, int options) const void Stem::KillNode(Supervisor::Node* node, int signal) const { + if ( node->pid <= 0 ) + { + DBG_STEM("Stem skip killing node '%s' (PID %d): already dead", + node->Name().data(), node->pid); + return; + } + node->killed = true; auto kill_res = kill(node->pid, signal); @@ -516,6 +530,13 @@ void Stem::Destroy(Supervisor::Node* node) const constexpr auto kill_delay = 2; auto kill_attempts = 0; + if ( node->pid <= 0 ) + { + DBG_STEM("Stem skip killing/waiting node '%s' (PID %d): already dead", + node->Name().data(), node->pid); + return; + } + for ( ; ; ) { auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL; @@ -626,6 +647,7 @@ void Stem::KillNodes(int signal) void Stem::Shutdown(int exit_code) { + DBG_STEM("Stem shutting down with exit code %d", exit_code); shutting_down = true; constexpr auto max_term_attempts = 13; constexpr auto kill_delay = 2; @@ -830,9 +852,8 @@ std::optional Stem::Poll() else if ( cmd == "destroy" ) { auto it = nodes.find(node_name); - assert(it != nodes.end()); auto& node = it->second; - DBG_STEM("Stem destroying node: %s", node_name.data()); + DBG_STEM("Stem destroying node: %s (PID %d)", node_name.data(), node.pid); Destroy(&node); nodes.erase(it); } @@ -841,7 +862,7 @@ std::optional Stem::Poll() auto it = nodes.find(node_name); assert(it != nodes.end()); auto& node = it->second; - DBG_STEM("Stem restarting node: %s", node_name.data()); + DBG_STEM("Stem restarting node: %s (PID %d)", node_name.data(), node.pid); Destroy(&node); auto sn = Spawn(&node); From bbdf5f8938bdc817b90188e943d7c12c70954c7d Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 13:28:20 -0800 Subject: [PATCH 41/76] Move supervisor control events into SupervisorControl namespace --- doc | 2 +- .../base/frameworks/supervisor/__load__.zeek | 1 + scripts/base/frameworks/supervisor/api.zeek | 72 ----------- .../base/frameworks/supervisor/control.zeek | 86 +++++++++++++ scripts/base/frameworks/supervisor/main.zeek | 116 +++++++++--------- .../core.check-unused-event-handlers/.stderr | 10 +- .../canonified_loaded_scripts.log | 1 + .../canonified_loaded_scripts.log | 1 + testing/btest/Baseline/plugins.hooks/output | 17 +-- 9 files changed, 166 insertions(+), 140 deletions(-) create mode 100644 scripts/base/frameworks/supervisor/control.zeek diff --git a/doc b/doc index 0cb30512c5..bcbcf4f766 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 0cb30512c52990fcdb1e93b5219f65c9b3d18dce +Subproject commit bcbcf4f7663088e017101df4cab5ebdb35ce4d09 diff --git a/scripts/base/frameworks/supervisor/__load__.zeek b/scripts/base/frameworks/supervisor/__load__.zeek index e3034f5f0b..97de26a373 100644 --- a/scripts/base/frameworks/supervisor/__load__.zeek +++ b/scripts/base/frameworks/supervisor/__load__.zeek @@ -1,2 +1,3 @@ @load ./api +@load ./control @load ./main diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 767bd8803f..814a8da5f3 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -114,76 +114,4 @@ export { ## It's an error to call this function from a process other than ## a supervised one. global node: function(): NodeConfig; - - ## Send a request to a remote Supervisor process to create a node. - ## - ## reqid: an arbitrary string that will be directly echoed in the response - ## - ## node: the desired configuration for the new supervised node process. - global Supervisor::create_request: event(reqid: string, node: NodeConfig); - - ## Handle a response from a Supervisor process that received - ## :zeek:see:`Supervisor::create_request`. - ## - ## reqid: an arbitrary string matching the value in the original request. - ## - ## result: the return value of the remote call to - ## :zeek:see:`Supervisor::create`. - global Supervisor::create_response: event(reqid: string, result: string); - - ## Send a request to a remote Supervisor process to retrieve node status. - ## - ## reqid: an arbitrary string that will be directly echoed in the response - ## - ## node: the name of the node to get status of or empty string to mean "all - ## nodes". - global Supervisor::status_request: event(reqid: string, node: string); - - ## Handle a response from a Supervisor process that received - ## :zeek:see:`Supervisor::status_request`. - ## - ## reqid: an arbitrary string matching the value in the original request. - ## - ## result: the return value of the remote call to - ## :zeek:see:`Supervisor::status`. - global Supervisor::status_response: event(reqid: string, result: Status); - - ## Send a request to a remote Supervisor process to restart a node. - ## - ## reqid: an arbitrary string that will be directly echoed in the response - ## - ## node: the name of the node to restart or empty string to mean "all - ## nodes". - global Supervisor::restart_request: event(reqid: string, node: string); - - ## Handle a response from a Supervisor process that received - ## :zeek:see:`Supervisor::restart_request`. - ## - ## reqid: an arbitrary string matching the value in the original request. - ## - ## result: the return value of the remote call to - ## :zeek:see:`Supervisor::restart`. - global Supervisor::restart_response: event(reqid: string, result: bool); - - ## Send a request to a remote Supervisor process to destroy a node. - ## - ## reqid: an arbitrary string that will be directly echoed in the response - ## - ## node: the name of the node to destory or empty string to mean "all - ## nodes". - global Supervisor::destroy_request: event(reqid: string, node: string); - - ## Handle a response from a Supervisor process that received - ## :zeek:see:`Supervisor::destroy_request`. - ## - ## reqid: an arbitrary string matching the value in the original request. - ## - ## result: the return value of the remote call to - ## :zeek:see:`Supervisor::destroy`. - global Supervisor::destroy_response: event(reqid: string, result: bool); - - ## Send a request to a remote Supervisor to stop and shutdown its - ## process tree. There is no response to this message as the Supervisor - ## simply terminates on receipt. - global Supervisor::stop_request: event(); } diff --git a/scripts/base/frameworks/supervisor/control.zeek b/scripts/base/frameworks/supervisor/control.zeek new file mode 100644 index 0000000000..7b3b4a8e5c --- /dev/null +++ b/scripts/base/frameworks/supervisor/control.zeek @@ -0,0 +1,86 @@ +##! The Zeek process supervision (remote) control API. This defines a Broker topic +##! prefix and events that can be used to control an external Zeek supervisor process. + +@load ./api + +module SupervisorControl; + +export { + ## The Broker topic prefix to use when subscribing to Supervisor API + ## requests and when publishing Supervisor API responses. If you are + ## publishing Supervisor requests, this is also the prefix string to use + ## for their topic names. + const topic_prefix = "zeek/supervisor" &redef; + + ## Send a request to a remote Supervisor process to create a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the desired configuration for the new supervised node process. + global SupervisorControl::create_request: event(reqid: string, node: Supervisor::NodeConfig); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`SupervisorControl::create_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::create`. + global SupervisorControl::create_response: event(reqid: string, result: string); + + ## Send a request to a remote Supervisor process to retrieve node status. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to get status of or empty string to mean "all + ## nodes". + global SupervisorControl::status_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`SupervisorControl::status_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::status`. + global SupervisorControl::status_response: event(reqid: string, result: Supervisor::Status); + + ## Send a request to a remote Supervisor process to restart a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to restart or empty string to mean "all + ## nodes". + global SupervisorControl::restart_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`SupervisorControl::restart_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::restart`. + global SupervisorControl::restart_response: event(reqid: string, result: bool); + + ## Send a request to a remote Supervisor process to destroy a node. + ## + ## reqid: an arbitrary string that will be directly echoed in the response + ## + ## node: the name of the node to destory or empty string to mean "all + ## nodes". + global SupervisorControl::destroy_request: event(reqid: string, node: string); + + ## Handle a response from a Supervisor process that received + ## :zeek:see:`SupervisorControl::destroy_request`. + ## + ## reqid: an arbitrary string matching the value in the original request. + ## + ## result: the return value of the remote call to + ## :zeek:see:`Supervisor::destroy`. + global SupervisorControl::destroy_response: event(reqid: string, result: bool); + + ## Send a request to a remote Supervisor to stop and shutdown its + ## process tree. There is no response to this message as the Supervisor + ## simply terminates on receipt. + global SupervisorControl::stop_request: event(); +} diff --git a/scripts/base/frameworks/supervisor/main.zeek b/scripts/base/frameworks/supervisor/main.zeek index 5a7a163df0..f892907055 100644 --- a/scripts/base/frameworks/supervisor/main.zeek +++ b/scripts/base/frameworks/supervisor/main.zeek @@ -1,63 +1,16 @@ -##! Implements Zeek process supervision configuration options and default -##! behavior. +##! Implements Zeek process supervision API and default behavior for its +##! associated (remote) control events. @load ./api +@load ./control @load base/frameworks/broker -module Supervisor; - -export { - ## The Broker topic prefix to use when subscribing to Supervisor API - ## requests and when publishing Supervisor API responses. If you are - ## publishing Supervisor requests, this is also the prefix string to use - ## for their topic names. - const topic_prefix = "zeek/supervisor" &redef; -} - -event zeek_init() &priority=10 - { - Broker::subscribe(Supervisor::topic_prefix); - } - -event Supervisor::stop_request() - { - terminate(); - } - -event Supervisor::status_request(reqid: string, node: string) - { - local res = Supervisor::status(node); - local topic = Supervisor::topic_prefix + fmt("/status_response/%s", reqid); - Broker::publish(topic, Supervisor::status_response, reqid, res); - } - -event Supervisor::create_request(reqid: string, node: NodeConfig) - { - local res = Supervisor::create(node); - local topic = Supervisor::topic_prefix + fmt("/create_response/%s", reqid); - Broker::publish(topic, Supervisor::create_response, reqid, res); - } - -event Supervisor::destroy_request(reqid: string, node: string) - { - local res = Supervisor::destroy(node); - local topic = Supervisor::topic_prefix + fmt("/destroy_response/%s", reqid); - Broker::publish(topic, Supervisor::destroy_response, reqid, res); - } - -event Supervisor::restart_request(reqid: string, node: string) - { - local res = Supervisor::restart(node); - local topic = Supervisor::topic_prefix + fmt("/restart_response/%s", reqid); - Broker::publish(topic, Supervisor::restart_response, reqid, res); - } - -function Supervisor::status(node: string): Status +function Supervisor::status(node: string): Supervisor::Status { return Supervisor::__status(node); } -function Supervisor::create(node: NodeConfig): string +function Supervisor::create(node: Supervisor::NodeConfig): string { return Supervisor::__create(node); } @@ -72,17 +25,70 @@ function Supervisor::restart(node: string): bool return Supervisor::__restart(node); } -function is_supervisor(): bool +function Supervisor::is_supervisor(): bool { return Supervisor::__is_supervisor(); } -function is_supervised(): bool +function Supervisor::is_supervised(): bool { return Supervisor::__is_supervised(); } -function node(): NodeConfig +function Supervisor::node(): Supervisor::NodeConfig { return Supervisor::__node(); } + +event zeek_init() &priority=10 + { + Broker::subscribe(SupervisorControl::topic_prefix); + } + +event SupervisorControl::stop_request() + { + if ( ! Supervisor::is_supervisor() ) + return; + + terminate(); + } + +event SupervisorControl::status_request(reqid: string, node: string) + { + if ( ! Supervisor::is_supervisor() ) + return; + + local res = Supervisor::status(node); + local topic = SupervisorControl::topic_prefix + fmt("/status_response/%s", reqid); + Broker::publish(topic, SupervisorControl::status_response, reqid, res); + } + +event SupervisorControl::create_request(reqid: string, node: Supervisor::NodeConfig) + { + if ( ! Supervisor::is_supervisor() ) + return; + + local res = Supervisor::create(node); + local topic = SupervisorControl::topic_prefix + fmt("/create_response/%s", reqid); + Broker::publish(topic, SupervisorControl::create_response, reqid, res); + } + +event SupervisorControl::destroy_request(reqid: string, node: string) + { + if ( ! Supervisor::is_supervisor() ) + return; + + local res = Supervisor::destroy(node); + local topic = SupervisorControl::topic_prefix + fmt("/destroy_response/%s", reqid); + Broker::publish(topic, SupervisorControl::destroy_response, reqid, res); + } + +event SupervisorControl::restart_request(reqid: string, node: string) + { + if ( ! Supervisor::is_supervisor() ) + return; + + local res = Supervisor::restart(node); + local topic = SupervisorControl::topic_prefix + fmt("/restart_response/%s", reqid); + Broker::publish(topic, SupervisorControl::restart_response, reqid, res); + } diff --git a/testing/btest/Baseline/core.check-unused-event-handlers/.stderr b/testing/btest/Baseline/core.check-unused-event-handlers/.stderr index f7d99adbfe..56eb1756c4 100644 --- a/testing/btest/Baseline/core.check-unused-event-handlers/.stderr +++ b/testing/btest/Baseline/core.check-unused-event-handlers/.stderr @@ -1,8 +1,8 @@ warning in , line 1: event handler never invoked: InputConfig::new_value warning in , line 1: event handler never invoked: InputRaw::process_finished -warning in , line 1: event handler never invoked: Supervisor::create_request -warning in , line 1: event handler never invoked: Supervisor::destroy_request -warning in , line 1: event handler never invoked: Supervisor::restart_request -warning in , line 1: event handler never invoked: Supervisor::status_request -warning in , line 1: event handler never invoked: Supervisor::stop_request +warning in , line 1: event handler never invoked: SupervisorControl::create_request +warning in , line 1: event handler never invoked: SupervisorControl::destroy_request +warning in , line 1: event handler never invoked: SupervisorControl::restart_request +warning in , line 1: event handler never invoked: SupervisorControl::status_request +warning in , line 1: event handler never invoked: SupervisorControl::stop_request warning in , line 1: event handler never invoked: this_is_never_used diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 583e4f8def..382bb30cb2 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -38,6 +38,7 @@ scripts/base/init-frameworks-and-bifs.zeek build/scripts/base/bif/store.bif.zeek scripts/base/frameworks/broker/log.zeek scripts/base/frameworks/supervisor/__load__.zeek + scripts/base/frameworks/supervisor/control.zeek scripts/base/frameworks/supervisor/main.zeek scripts/base/frameworks/input/__load__.zeek scripts/base/frameworks/input/main.zeek diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 82b307c9d2..5d9fbfde6d 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -38,6 +38,7 @@ scripts/base/init-frameworks-and-bifs.zeek build/scripts/base/bif/store.bif.zeek scripts/base/frameworks/broker/log.zeek scripts/base/frameworks/supervisor/__load__.zeek + scripts/base/frameworks/supervisor/control.zeek scripts/base/frameworks/supervisor/main.zeek scripts/base/frameworks/input/__load__.zeek scripts/base/frameworks/input/main.zeek diff --git a/testing/btest/Baseline/plugins.hooks/output b/testing/btest/Baseline/plugins.hooks/output index 28cce57ab0..dde704c5af 100644 --- a/testing/btest/Baseline/plugins.hooks/output +++ b/testing/btest/Baseline/plugins.hooks/output @@ -276,7 +276,7 @@ 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) -> 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) -> 0.000000 MetaHookPost CallFunction(Log::__create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -> -0.000000 MetaHookPost CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T])) -> +0.000000 MetaHookPost CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T])) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Broker::LOG)) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Cluster::LOG)) -> 0.000000 MetaHookPost CallFunction(Log::add_default_filter, , (Config::LOG)) -> @@ -457,7 +457,7 @@ 0.000000 MetaHookPost CallFunction(Log::create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) -> 0.000000 MetaHookPost CallFunction(Log::create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) -> 0.000000 MetaHookPost CallFunction(Log::create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -> -0.000000 MetaHookPost CallFunction(Log::write, , (PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T])) -> +0.000000 MetaHookPost CallFunction(Log::write, , (PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T])) -> 0.000000 MetaHookPost CallFunction(NetControl::check_plugins, , ()) -> 0.000000 MetaHookPost CallFunction(NetControl::init, , ()) -> 0.000000 MetaHookPost CallFunction(Notice::want_pp, , ()) -> @@ -711,6 +711,7 @@ 0.000000 MetaHookPost LoadFile(0, .<...>/const.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/consts.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/contents.zeek) -> -1 +0.000000 MetaHookPost LoadFile(0, .<...>/control.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/ct-list.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/data.bif.zeek) -> -1 0.000000 MetaHookPost LoadFile(0, .<...>/dcc-send.zeek) -> -1 @@ -1179,7 +1180,7 @@ 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) 0.000000 MetaHookPre CallFunction(Log::__create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -0.000000 MetaHookPre CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T])) +0.000000 MetaHookPre CallFunction(Log::__write, , (PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T])) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Broker::LOG)) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Cluster::LOG)) 0.000000 MetaHookPre CallFunction(Log::add_default_filter, , (Config::LOG)) @@ -1360,7 +1361,7 @@ 0.000000 MetaHookPre CallFunction(Log::create_stream, , (Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird])) 0.000000 MetaHookPre CallFunction(Log::create_stream, , (X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509])) 0.000000 MetaHookPre CallFunction(Log::create_stream, , (mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql])) -0.000000 MetaHookPre CallFunction(Log::write, , (PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T])) +0.000000 MetaHookPre CallFunction(Log::write, , (PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T])) 0.000000 MetaHookPre CallFunction(NetControl::check_plugins, , ()) 0.000000 MetaHookPre CallFunction(NetControl::init, , ()) 0.000000 MetaHookPre CallFunction(Notice::want_pp, , ()) @@ -1614,6 +1615,7 @@ 0.000000 MetaHookPre LoadFile(0, .<...>/const.bif.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/consts.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/contents.zeek) +0.000000 MetaHookPre LoadFile(0, .<...>/control.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/ct-list.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/data.bif.zeek) 0.000000 MetaHookPre LoadFile(0, .<...>/dcc-send.zeek) @@ -2081,7 +2083,7 @@ 0.000000 | HookCallFunction Log::__create_stream(Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird]) 0.000000 | HookCallFunction Log::__create_stream(X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509]) 0.000000 | HookCallFunction Log::__create_stream(mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql]) -0.000000 | HookCallFunction Log::__write(PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T]) +0.000000 | HookCallFunction Log::__write(PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T]) 0.000000 | HookCallFunction Log::add_default_filter(Broker::LOG) 0.000000 | HookCallFunction Log::add_default_filter(Cluster::LOG) 0.000000 | HookCallFunction Log::add_default_filter(Config::LOG) @@ -2262,7 +2264,7 @@ 0.000000 | HookCallFunction Log::create_stream(Weird::LOG, [columns=Weird::Info, ev=Weird::log_weird, path=weird]) 0.000000 | HookCallFunction Log::create_stream(X509::LOG, [columns=X509::Info, ev=X509::log_x509, path=x509]) 0.000000 | HookCallFunction Log::create_stream(mysql::LOG, [columns=MySQL::Info, ev=MySQL::log_mysql, path=mysql]) -0.000000 | HookCallFunction Log::write(PacketFilter::LOG, [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T]) +0.000000 | HookCallFunction Log::write(PacketFilter::LOG, [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T]) 0.000000 | HookCallFunction NetControl::check_plugins() 0.000000 | HookCallFunction NetControl::init() 0.000000 | HookCallFunction Notice::want_pp() @@ -2518,6 +2520,7 @@ 0.000000 | HookLoadFile .<...>/const.bif.zeek 0.000000 | HookLoadFile .<...>/consts.zeek 0.000000 | HookLoadFile .<...>/contents.zeek +0.000000 | HookLoadFile .<...>/control.zeek 0.000000 | HookLoadFile .<...>/ct-list.zeek 0.000000 | HookLoadFile .<...>/data.bif.zeek 0.000000 | HookLoadFile .<...>/dcc-send.zeek @@ -2702,7 +2705,7 @@ 0.000000 | HookLoadFile base<...>/xmpp 0.000000 | HookLoadFile base<...>/zeek.bif.zeek 0.000000 | HookLogInit packet_filter 1/1 {ts (time), node (string), filter (string), init (bool), success (bool)} -0.000000 | HookLogWrite packet_filter [ts=1578437544.484903, node=zeek, filter=ip or not ip, init=T, success=T] +0.000000 | HookLogWrite packet_filter [ts=1579727603.636084, node=zeek, filter=ip or not ip, init=T, success=T] 0.000000 | HookQueueEvent NetControl::init() 0.000000 | HookQueueEvent filter_change_tracking() 0.000000 | HookQueueEvent zeek_init() From 7a748526c05f473dd9a1f03db14421c88bf16cb4 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 13:58:20 -0800 Subject: [PATCH 42/76] Use consistent zeek_init priority for Log::create_stream calls Typically in base scripts, Log::create_stream() is called in zeek_init() handler with &priority=5 such that it will have already been created in the default zeek_init() &priority=0. --- scripts/base/frameworks/signatures/main.zeek | 2 +- scripts/policy/files/x509/log-ocsp.zeek | 2 +- scripts/policy/protocols/conn/known-hosts.zeek | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/base/frameworks/signatures/main.zeek b/scripts/base/frameworks/signatures/main.zeek index eb2143d4ec..9a301d520a 100644 --- a/scripts/base/frameworks/signatures/main.zeek +++ b/scripts/base/frameworks/signatures/main.zeek @@ -140,7 +140,7 @@ global count_per_orig: table[addr, string] of count global did_sig_log: set[string] &read_expire = 1 hr; -event zeek_init() +event zeek_init() &priority=5 { Log::create_stream(Signatures::LOG, [$columns=Info, $ev=log_signature, $path="signatures"]); } diff --git a/scripts/policy/files/x509/log-ocsp.zeek b/scripts/policy/files/x509/log-ocsp.zeek index 8cc9d5aef3..a677b38055 100644 --- a/scripts/policy/files/x509/log-ocsp.zeek +++ b/scripts/policy/files/x509/log-ocsp.zeek @@ -39,7 +39,7 @@ export { global log_ocsp: event(rec: Info); } -event zeek_init() +event zeek_init() &priority=5 { Log::create_stream(LOG, [$columns=Info, $ev=log_ocsp, $path="ocsp"]); Files::register_for_mime_type(Files::ANALYZER_OCSP_REPLY, "application/ocsp-response"); diff --git a/scripts/policy/protocols/conn/known-hosts.zeek b/scripts/policy/protocols/conn/known-hosts.zeek index 8a3383e1b2..e95f0cec16 100644 --- a/scripts/policy/protocols/conn/known-hosts.zeek +++ b/scripts/policy/protocols/conn/known-hosts.zeek @@ -145,7 +145,7 @@ event Known::host_found(info: HostsInfo) event known_host_add(info); } -event zeek_init() +event zeek_init() &priority=5 { Log::create_stream(Known::HOSTS_LOG, [$columns=HostsInfo, $ev=log_known_hosts, $path="known_hosts"]); } From c75519ca8869448efc488598090f96d63352312f Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 15:41:50 -0800 Subject: [PATCH 43/76] Improve creation of SMB transaction data strings --- .../protocol/smb/smb1-com-transaction.pac | 69 +++++++------------ 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/src/analyzer/protocol/smb/smb1-com-transaction.pac b/src/analyzer/protocol/smb/smb1-com-transaction.pac index 414066c6ba..0c2f128794 100644 --- a/src/analyzer/protocol/smb/smb1-com-transaction.pac +++ b/src/analyzer/protocol/smb/smb1-com-transaction.pac @@ -4,10 +4,29 @@ enum Trans_subcommands { NT_TRANSACT_CREATE2 = 0x0009, }; +%code{ + StringVal* SMB_Conn::transaction_data_to_val(SMB1_transaction_data* payload) + { + switch ( payload->trans_type() ) { + case SMB_PIPE: + return bytestring_to_val(payload->pipe_data()); + case SMB_UNKNOWN: + return bytestring_to_val(payload->unknown()); + default: + return bytestring_to_val(payload->data()); + } + + assert(false); + return val_mgr->GetEmptyString(); + } +%} + refine connection SMB_Conn += { %member{ map is_file_a_pipe; + + static StringVal* transaction_data_to_val(SMB1_transaction_data* payload); %} function get_is_file_a_pipe(id: uint16): bool @@ -37,32 +56,11 @@ refine connection SMB_Conn += { StringVal* parameters = new StringVal(${val.parameters}.length(), (const char*)${val.parameters}.data()); StringVal* payload_str = nullptr; - SMB1_transaction_data* payload = nullptr; if ( ${val.data_count} > 0 ) - { - payload = ${val.data}; - } - - if ( payload ) - { - switch ( payload->trans_type() ) { - case SMB_PIPE: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data.pipe_data}.data()); - break; - case SMB_UNKNOWN: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data.unknown}.data()); - break; - default: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data.data}.data()); - break; - } - } - - if ( ! payload_str ) - { + payload_str = transaction_data_to_val(${val.data}); + else payload_str = val_mgr->GetEmptyString(); - } BifEvent::generate_smb1_transaction_request(bro_analyzer(), bro_analyzer()->Conn(), @@ -83,32 +81,11 @@ refine connection SMB_Conn += { StringVal* parameters = new StringVal(${val.parameters}.length(), (const char*)${val.parameters}.data()); StringVal* payload_str = nullptr; - SMB1_transaction_data* payload = nullptr; if ( ${val.data_count} > 0 ) - { - payload = ${val.data[0]}; - } - - if ( payload ) - { - switch ( payload->trans_type() ) { - case SMB_PIPE: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data[0].pipe_data}.data()); - break; - case SMB_UNKNOWN: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data[0].unknown}.data()); - break; - default: - payload_str = new StringVal(${val.data_count}, (const char*)${val.data[0].data}.data()); - break; - } - } - - if ( ! payload_str ) - { + payload_str = transaction_data_to_val(${val.data[0]}); + else payload_str = val_mgr->GetEmptyString(); - } BifEvent::generate_smb1_transaction_response(bro_analyzer(), bro_analyzer()->Conn(), From f939bcad7e433fe61c560a05bfc12731b08315d1 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 16:49:32 -0800 Subject: [PATCH 44/76] Skip file analysis for zero-length SSL/TLS data --- src/analyzer/protocol/ssl/proc-certificate.pac | 3 +++ src/analyzer/protocol/ssl/tls-handshake-analyzer.pac | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/analyzer/protocol/ssl/proc-certificate.pac b/src/analyzer/protocol/ssl/proc-certificate.pac index 7c57f31744..a9514c8c7c 100644 --- a/src/analyzer/protocol/ssl/proc-certificate.pac +++ b/src/analyzer/protocol/ssl/proc-certificate.pac @@ -16,6 +16,9 @@ { const bytestring& cert = (*certificates)[i]; + if ( cert.length() <= 0 ) + continue; + ODesc file_handle; file_handle.Add(common.Description()); file_handle.Add(i); diff --git a/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac b/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac index 5e8e31e0b2..9d5f3d8ea7 100644 --- a/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac +++ b/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac @@ -303,7 +303,7 @@ refine connection Handshake_Conn += { common.AddRaw("F"); bro_analyzer()->Conn()->IDString(&common); - if ( status_type == 1 ) // ocsp + if ( status_type == 1 && response.length() > 0 ) // ocsp { ODesc file_handle; file_handle.Add(common.Description()); From fce4bb3f5018d41dcbc9526820df682367059c67 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 22 Jan 2020 19:50:14 -0800 Subject: [PATCH 45/76] Improve FTP word/whitespace handling --- src/analyzer/protocol/ftp/FTP.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analyzer/protocol/ftp/FTP.cc b/src/analyzer/protocol/ftp/FTP.cc index 97ce54c481..da86ad7db4 100644 --- a/src/analyzer/protocol/ftp/FTP.cc +++ b/src/analyzer/protocol/ftp/FTP.cc @@ -83,7 +83,7 @@ void FTP_Analyzer::DeliverStream(int length, const u_char* data, bool orig) StringVal* cmd_str; line = skip_whitespace(line, end_of_line); - get_word(length, line, cmd_len, cmd); + get_word(end_of_line - line, line, cmd_len, cmd); line = skip_whitespace(line + cmd_len, end_of_line); if ( cmd_len == 0 ) From fc1c95725b72c18f7767ce0db99d01396c8e0bc4 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 23 Jan 2020 13:50:42 +0000 Subject: [PATCH 46/76] Update Broker submodule. Includes a test baseline update for change in Broker error message. --- CHANGES | 4 ++++ VERSION | 2 +- aux/broker | 2 +- testing/btest/Baseline/broker.ssl_auth_failure/send.send.out | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 4e06e79462..9d4ca192ff 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,8 @@ +3.1.0-dev.395 | 2020-01-23 13:50:42 +0000 + + * Update Broker submodule. + 3.1.0-dev.394 | 2020-01-23 13:18:54 +0000 * Improve creation of SMB transaction data strings. (Jon Siwek, diff --git a/VERSION b/VERSION index fdf57132c3..ef226cbe0f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-dev.394 +3.1.0-dev.395 diff --git a/aux/broker b/aux/broker index 2854e3e308..a5a40fa7ef 160000 --- a/aux/broker +++ b/aux/broker @@ -1 +1 @@ -Subproject commit 2854e3e30810673fd6b9200321fa4ada364c559b +Subproject commit a5a40fa7ef1b386ece15c4a6a10994c511e655bd diff --git a/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out b/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out index 057a0f3d8e..4523e6a6c5 100644 --- a/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out +++ b/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out @@ -1 +1 @@ -sender error: code=Broker::PEER_UNAVAILABLE msg=(invalid-node, *, "remote endpoint unavailable") +sender error: code=Broker::PEER_UNAVAILABLE msg=(invalid-node, *, "remote endpoint unavailable: system_error(disconnect_during_handshake)") From f45c2cf06b4b6ed151158bac5b87c901cf29a7b9 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 23 Jan 2020 18:08:43 -0800 Subject: [PATCH 47/76] Skip check for outdated docs in Cirrus CI for PRs --- CHANGES | 4 ++++ VERSION | 2 +- testing/btest/coverage/sphinx-zeekygen-docs.sh | 9 +++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index dba39d1730..c75e5db7ca 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,8 @@ +3.1.0-dev.398 | 2020-01-23 18:08:43 -0800 + + * Skip check for outdated docs in Cirrus CI for PRs (Jon Siwek, Corelight) + 3.1.0-dev.397 | 2020-01-23 15:30:33 -0800 * Use consistent zeek_init priority for Log::create_stream calls diff --git a/VERSION b/VERSION index 28bd5611e8..d4ab5411c1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-dev.397 +3.1.0-dev.398 diff --git a/testing/btest/coverage/sphinx-zeekygen-docs.sh b/testing/btest/coverage/sphinx-zeekygen-docs.sh index b5e3d7262c..20e18c2364 100644 --- a/testing/btest/coverage/sphinx-zeekygen-docs.sh +++ b/testing/btest/coverage/sphinx-zeekygen-docs.sh @@ -8,10 +8,15 @@ # @TEST-EXEC: bash $SCRIPTS/update-zeekygen-docs.sh ./doc # @TEST-EXEC: bash %INPUT +# This test isn't run on Travis or Cirrus CI for pull-requests. Instead, +# the person merging to master will manually update zeek-docs. + +if [ -n "$CIRRUS_PR" ]; then + exit 0 +fi + if [ -n "$TRAVIS_PULL_REQUEST" ]; then if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - # Don't run this test on Travis for pull-requests, just let someone - # manually update zeek-docs for things when merging to master. exit 0 fi fi From cc8d28089b8fc5a93e31060dddcb7dcf35deeab4 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 24 Jan 2020 13:18:33 -0800 Subject: [PATCH 48/76] Updating submodule(s). [nomail] --- aux/broker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aux/broker b/aux/broker index a5a40fa7ef..89a43997d6 160000 --- a/aux/broker +++ b/aux/broker @@ -1 +1 @@ -Subproject commit a5a40fa7ef1b386ece15c4a6a10994c511e655bd +Subproject commit 89a43997d6715fca4e2242a3b88b3aa1c075a340 From 440468a0dc885f550469507ba6c9b052b65dea5d Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 24 Jan 2020 22:20:25 +0100 Subject: [PATCH 49/76] Adding test for decompose_uri parsing URI with empty port I'd expect the portnum component to remain uninitialized in case the given URI specifies no port number -- however, decompose_uri() raises an error instead. Will address this in a subsequent commit. --- testing/btest/Baseline/scripts.base.utils.urls/output | 1 + testing/btest/scripts/base/utils/urls.test | 1 + 2 files changed, 2 insertions(+) diff --git a/testing/btest/Baseline/scripts.base.utils.urls/output b/testing/btest/Baseline/scripts.base.utils.urls/output index 2d8f5b2c4d..e10010ab10 100644 --- a/testing/btest/Baseline/scripts.base.utils.urls/output +++ b/testing/btest/Baseline/scripts.base.utils.urls/output @@ -6,6 +6,7 @@ [scheme=http, netlocation=hyphen-example.com, portnum=, path=/index.asp, file_name=index.asp, file_base=index, file_ext=asp, params={ [q] = 123 }] +[scheme=git, netlocation=git.kernel.org, portnum=, path=/pub/scm/linux/, file_name=, file_base=, file_ext=, params=] [scheme=, netlocation=dfasjdfasdfasdf, portnum=, path=/, file_name=, file_base=, file_ext=, params={ }] diff --git a/testing/btest/scripts/base/utils/urls.test b/testing/btest/scripts/base/utils/urls.test index 666f805edb..c307ee601e 100644 --- a/testing/btest/scripts/base/utils/urls.test +++ b/testing/btest/scripts/base/utils/urls.test @@ -8,6 +8,7 @@ print decompose_uri("https://www.example.com/"); print decompose_uri("http://example.com:99/test//?foo=bar"); print decompose_uri("ftp://1.2.3.4/pub/files/something.exe"); print decompose_uri("http://hyphen-example.com/index.asp?q=123"); +print decompose_uri("git://git.kernel.org:/pub/scm/linux/"); # This is mostly undefined behavior but it doesn't give any # reporter messages at least. From bb879fc2aac5d6fa60fbe6b598a2efb92eec8e80 Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 24 Jan 2020 22:08:42 +0100 Subject: [PATCH 50/76] Fixed decompose_uri choking on URIs with empty port component A call such as decompose_uri("git://git.kernel.org:/pub/scm/linux/"); would raise an error along the lines of error in /usr/local/zeek-3.0.0/share/zeek/base/utils/urls.zeek, line 122: bad conversion to count (to_count(parts[1]) and ) This was because an empty string got passsed to the to_count() function. Let's improve the behaviour and rather consider the portnum component of the URI to be uninitialized. --- scripts/base/utils/urls.zeek | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/base/utils/urls.zeek b/scripts/base/utils/urls.zeek index c6ec41cbfc..c8077f5424 100644 --- a/scripts/base/utils/urls.zeek +++ b/scripts/base/utils/urls.zeek @@ -119,7 +119,10 @@ function decompose_uri(uri: string): URI # Parse location and port. parts = split_string1(s, /:/); u$netlocation = parts[0]; - u$portnum = to_count(parts[1]); + if ( parts[1] != "" ) + { + u$portnum = to_count(parts[1]); + } } else { From 4955356a52b6418f1297104bc14f32a3f0c8c81c Mon Sep 17 00:00:00 2001 From: Frerich Raabe Date: Fri, 24 Jan 2020 22:19:04 +0100 Subject: [PATCH 51/76] Document recent fix for decompose_uri in release notes --- NEWS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS b/NEWS index 772886070f..208fb89d5b 100644 --- a/NEWS +++ b/NEWS @@ -67,6 +67,11 @@ Changed Functionality previously used. Output from the formatters remains nearly identical. +- The ``decompose_uri`` function no longer raises an error when parsing + URIs with an empty port number (e.g. ``http://example.org:/``). Instead, + the ``portnum`` component of the returned ``URI`` value is left + uninitialized. + Removed Functionality --------------------- From 5fb01caee613bd64f874277001331e6f7111c61e Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 27 Jan 2020 10:58:40 -0800 Subject: [PATCH 52/76] Add btests for supervisor stem/leaf process revival --- src/supervisor/supervisor.bif | 12 ++++ .../supervisor.revive-leaf/zeek.node.out | 2 + .../zeek.supervisor.out | 7 ++ .../supervisor.revive-stem/zeek.node.out | 2 + .../zeek.supervisor.out | 7 ++ testing/btest/supervisor/revive-leaf.zeek | 70 +++++++++++++++++++ testing/btest/supervisor/revive-stem.zeek | 65 +++++++++++++++++ 7 files changed, 165 insertions(+) create mode 100644 testing/btest/Baseline/supervisor.revive-leaf/zeek.node.out create mode 100644 testing/btest/Baseline/supervisor.revive-leaf/zeek.supervisor.out create mode 100644 testing/btest/Baseline/supervisor.revive-stem/zeek.node.out create mode 100644 testing/btest/Baseline/supervisor.revive-stem/zeek.supervisor.out create mode 100644 testing/btest/supervisor/revive-leaf.zeek create mode 100644 testing/btest/supervisor/revive-stem.zeek diff --git a/src/supervisor/supervisor.bif b/src/supervisor/supervisor.bif index b97c562deb..6bc6db17d6 100644 --- a/src/supervisor/supervisor.bif +++ b/src/supervisor/supervisor.bif @@ -98,3 +98,15 @@ function Supervisor::__is_supervisor%(%): bool %{ return val_mgr->GetBool(zeek::supervisor_mgr != nullptr); %} + +function Supervisor::__stem_pid%(%): int + %{ + if ( zeek::supervisor_mgr ) + return val_mgr->GetInt(zeek::supervisor_mgr->StemPID()); + + if ( zeek::Supervisor::ThisNode() ) + return val_mgr->GetInt(zeek::Supervisor::ThisNode()->parent_pid); + + builtin_error("supervisor mode not enabled and not a supervised node"); + return val_mgr->GetInt(-1); + %} diff --git a/testing/btest/Baseline/supervisor.revive-leaf/zeek.node.out b/testing/btest/Baseline/supervisor.revive-leaf/zeek.node.out new file mode 100644 index 0000000000..101b306cf0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.revive-leaf/zeek.node.out @@ -0,0 +1,2 @@ +supervised node zeek_init() +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.revive-leaf/zeek.supervisor.out b/testing/btest/Baseline/supervisor.revive-leaf/zeek.supervisor.out new file mode 100644 index 0000000000..7e3ffd50b4 --- /dev/null +++ b/testing/btest/Baseline/supervisor.revive-leaf/zeek.supervisor.out @@ -0,0 +1,7 @@ +supervisor zeek_init() +supervisor connected to peer +supervisor lost peer +supervisor connected to peer +supervisor lost peer +supervisor connected to peer +supervisor zeek_done() diff --git a/testing/btest/Baseline/supervisor.revive-stem/zeek.node.out b/testing/btest/Baseline/supervisor.revive-stem/zeek.node.out new file mode 100644 index 0000000000..101b306cf0 --- /dev/null +++ b/testing/btest/Baseline/supervisor.revive-stem/zeek.node.out @@ -0,0 +1,2 @@ +supervised node zeek_init() +supervised node zeek_done() diff --git a/testing/btest/Baseline/supervisor.revive-stem/zeek.supervisor.out b/testing/btest/Baseline/supervisor.revive-stem/zeek.supervisor.out new file mode 100644 index 0000000000..7e3ffd50b4 --- /dev/null +++ b/testing/btest/Baseline/supervisor.revive-stem/zeek.supervisor.out @@ -0,0 +1,7 @@ +supervisor zeek_init() +supervisor connected to peer +supervisor lost peer +supervisor connected to peer +supervisor lost peer +supervisor connected to peer +supervisor zeek_done() diff --git a/testing/btest/supervisor/revive-leaf.zeek b/testing/btest/supervisor/revive-leaf.zeek new file mode 100644 index 0000000000..448970283b --- /dev/null +++ b/testing/btest/supervisor/revive-leaf.zeek @@ -0,0 +1,70 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; +global topic = "test-topic"; +global peers_added = 0; + +event kill_self() + { + system(fmt("kill %s", getpid())); + } + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::subscribe(topic); + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + ++peers_added; + + if ( Supervisor::is_supervisor() ) + { + print supervisor_output_file, "supervisor connected to peer"; + + if ( peers_added == 3 ) + terminate(); + else + Broker::publish(topic, kill_self); + } + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervisor() ) + print supervisor_output_file, "supervisor lost peer"; + } + +event zeek_done() + { + if ( Supervisor::is_supervisor() ) + print supervisor_output_file, "supervisor zeek_done()"; + else + print node_output_file, "supervised node zeek_done()"; + } diff --git a/testing/btest/supervisor/revive-stem.zeek b/testing/btest/supervisor/revive-stem.zeek new file mode 100644 index 0000000000..37dc3e5053 --- /dev/null +++ b/testing/btest/supervisor/revive-stem.zeek @@ -0,0 +1,65 @@ +# @TEST-PORT: BROKER_PORT +# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT +# @TEST-EXEC: btest-bg-wait 20 +# @TEST-EXEC: btest-diff zeek/supervisor.out +# @TEST-EXEC: btest-diff zeek/node.out + +# So the supervised node doesn't terminate right away. +redef exit_only_after_terminate=T; + +global supervisor_output_file: file; +global node_output_file: file; +global topic = "test-topic"; +global peers_added = 0; + +event zeek_init() + { + if ( Supervisor::is_supervisor() ) + { + Broker::subscribe(topic); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + supervisor_output_file = open("supervisor.out"); + print supervisor_output_file, "supervisor zeek_init()"; + local sn = Supervisor::NodeConfig($name="grault"); + local res = Supervisor::create(sn); + + if ( res != "" ) + print supervisor_output_file, res; + } + else + { + Broker::subscribe(topic); + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + node_output_file = open("node.out"); + print node_output_file, "supervised node zeek_init()"; + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + ++peers_added; + + if ( Supervisor::is_supervisor() ) + { + print supervisor_output_file, "supervisor connected to peer"; + + if ( peers_added == 3 ) + terminate(); + else + system(fmt("kill %s", Supervisor::__stem_pid())); + } + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + if ( Supervisor::is_supervisor() ) + print supervisor_output_file, "supervisor lost peer"; + } + +event zeek_done() + { + if ( Supervisor::is_supervisor() ) + print supervisor_output_file, "supervisor zeek_done()"; + else + print node_output_file, "supervised node zeek_done()"; + } From 53363a9bd30164d1e454768ce3e55cc9eebac2b7 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 27 Jan 2020 11:24:37 -0800 Subject: [PATCH 53/76] Move command-line arg parsing functions to Options.{h,cc} --- src/Options.cc | 408 +++++++++++++++++++++++++++++++++++++++++++++++++ src/Options.h | 20 +++ src/main.cc | 406 +----------------------------------------------- 3 files changed, 431 insertions(+), 403 deletions(-) diff --git a/src/Options.cc b/src/Options.cc index 5c43b13477..6143657967 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -1,5 +1,16 @@ // See the file "COPYING" in the main distribution directory for copyright. +#include + +#include "zeek-config.h" + +#ifdef HAVE_GETOPT_H +#include +#endif + +#include "bsd-getopt-long.h" +#include "logging/writers/ascii/Ascii.h" + #include "Options.h" void zeek::Options::filter_supervisor_options() @@ -50,3 +61,400 @@ void zeek::Options::filter_supervised_node_options() scripts_to_load = og.scripts_to_load; script_options_to_set = og.script_options_to_set; } + +bool zeek::fake_dns() + { + return zeekenv("ZEEK_DNS_FAKE"); + } + +extern const char* zeek_version(); + +void zeek::usage(const char* prog, int code) + { + fprintf(stderr, "zeek version %s\n", zeek_version()); + + fprintf(stderr, "usage: %s [options] [file ...]\n", prog); + fprintf(stderr, "usage: %s --test [doctest-options] -- [options] [file ...]\n", prog); + fprintf(stderr, " | Zeek script file, or read stdin\n"); + fprintf(stderr, " -a|--parse-only | exit immediately after parsing scripts\n"); + fprintf(stderr, " -b|--bare-mode | don't load scripts from the base/ directory\n"); + fprintf(stderr, " -d|--debug-script | activate Zeek script debugging\n"); + fprintf(stderr, " -e|--exec | augment loaded scripts by given code\n"); + fprintf(stderr, " -f|--filter | tcpdump filter\n"); + fprintf(stderr, " -h|--help | command line help\n"); + fprintf(stderr, " -i|--iface | read from given interface\n"); + fprintf(stderr, " -p|--prefix | add given prefix to Zeek script file resolution\n"); + fprintf(stderr, " -r|--readfile | read from given tcpdump file\n"); + fprintf(stderr, " -s|--rulefile | read rules from given file\n"); + fprintf(stderr, " -t|--tracefile | activate execution tracing\n"); + fprintf(stderr, " -v|--version | print version and exit\n"); + fprintf(stderr, " -w|--writefile | write to given tcpdump file\n"); +#ifdef DEBUG + fprintf(stderr, " -B|--debug | Enable debugging output for selected streams ('-B help' for help)\n"); +#endif + fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); + fprintf(stderr, " -F|--force-dns | force DNS\n"); + fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); + fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); + fprintf(stderr, " -I|--print-id | print out given ID\n"); + fprintf(stderr, " -N|--print-plugins | print available plugins and exit (-NN for verbose)\n"); + fprintf(stderr, " -P|--prime-dns | prime DNS\n"); + fprintf(stderr, " -Q|--time | print execution time summary to stderr\n"); + fprintf(stderr, " -S|--debug-rules | enable rule debugging\n"); + fprintf(stderr, " -T|--re-level | set 'RE_level' for rules\n"); + fprintf(stderr, " -U|--status-file | Record process status in file\n"); + fprintf(stderr, " -W|--watchdog | activate watchdog timer\n"); + fprintf(stderr, " -X|--zeekygen | generate documentation based on config file\n"); + +#ifdef USE_PERFTOOLS_DEBUG + fprintf(stderr, " -m|--mem-leaks | show leaks [perftools]\n"); + fprintf(stderr, " -M|--mem-profile | record heap [perftools]\n"); +#endif + fprintf(stderr, " --pseudo-realtime[=] | enable pseudo-realtime for performance evaluation (default 1)\n"); + fprintf(stderr, " -j|--jobs | enable supervisor mode\n"); + +#ifdef USE_IDMEF + fprintf(stderr, " -n|--idmef-dtd | specify path to IDMEF DTD file\n"); +#endif + + fprintf(stderr, " --test | run unit tests ('--test -h' for help, only when compiling with ENABLE_ZEEK_UNIT_TESTS)\n"); + fprintf(stderr, " $ZEEKPATH | file search path (%s)\n", bro_path().c_str()); + fprintf(stderr, " $ZEEK_PLUGIN_PATH | plugin search path (%s)\n", bro_plugin_path()); + fprintf(stderr, " $ZEEK_PLUGIN_ACTIVATE | plugins to always activate (%s)\n", bro_plugin_activate()); + fprintf(stderr, " $ZEEK_PREFIXES | prefix list (%s)\n", bro_prefixes().c_str()); + fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n", zeek::fake_dns() ? "on" : "off"); + fprintf(stderr, " $ZEEK_SEED_FILE | file to load seeds from (not set)\n"); + fprintf(stderr, " $ZEEK_LOG_SUFFIX | ASCII log file extension (.%s)\n", logging::writer::Ascii::LogExt().c_str()); + fprintf(stderr, " $ZEEK_PROFILER_FILE | Output file for script execution statistics (not set)\n"); + fprintf(stderr, " $ZEEK_DISABLE_ZEEKYGEN | Disable Zeekygen documentation support (%s)\n", zeekenv("ZEEK_DISABLE_ZEEKYGEN") ? "set" : "not set"); + fprintf(stderr, " $ZEEK_DNS_RESOLVER | IPv4/IPv6 address of DNS resolver to use (%s)\n", zeekenv("ZEEK_DNS_RESOLVER") ? zeekenv("ZEEK_DNS_RESOLVER") : "not set, will use first IPv4 address from /etc/resolv.conf"); + fprintf(stderr, " $ZEEK_DEBUG_LOG_STDERR | Use stderr for debug logs generated via the -B flag"); + + fprintf(stderr, "\n"); + + exit(code); + } + +zeek::Options zeek::parse_cmdline(int argc, char** argv) + { + zeek::Options rval; + + // When running unit tests, the first argument on the command line must be + // --test, followed by doctest options. Optionally, users can use "--" as + // separator to pass Zeek options afterwards: + // + // zeek --test [doctest-options] -- [zeek-options] + + // Just locally filtering out the args for Zeek usage from doctest args. + std::vector zeek_args; + + if ( argc > 1 && strcmp(argv[1], "--test") == 0 ) + { + #ifdef DOCTEST_CONFIG_DISABLE + fprintf(stderr, "ERROR: C++ unit tests are disabled for this build.\n" + " Please re-compile with ENABLE_ZEEK_UNIT_TESTS " + "to run the C++ unit tests.\n"); + usage(argv[0], 1); + #endif + + auto is_separator = [](const char* cstr) + { + return strcmp(cstr, "--") == 0; + }; + auto first = argv; + auto last = argv + argc; + auto separator = std::find_if(first, last, is_separator); + zeek_args.emplace_back(argv[0]); + + if ( separator != last ) + { + auto first_zeek_arg = std::next(separator); + + for ( auto i = first_zeek_arg; i != last; ++i ) + zeek_args.emplace_back(*i); + } + + rval.run_unit_tests = true; + + for ( auto i = 0; i < std::distance(first, separator); ++i ) + rval.doctest_args.emplace_back(argv[i]); + } + else + { + for ( auto i = 0; i < argc; ++i ) + zeek_args.emplace_back(argv[i]); + } + + constexpr struct option long_opts[] = { + {"parse-only", no_argument, 0, 'a'}, + {"bare-mode", no_argument, 0, 'b'}, + {"debug-script", no_argument, 0, 'd'}, + {"exec", required_argument, 0, 'e'}, + {"filter", required_argument, 0, 'f'}, + {"help", no_argument, 0, 'h'}, + {"iface", required_argument, 0, 'i'}, + {"zeekygen", required_argument, 0, 'X'}, + {"prefix", required_argument, 0, 'p'}, + {"readfile", required_argument, 0, 'r'}, + {"rulefile", required_argument, 0, 's'}, + {"tracefile", required_argument, 0, 't'}, + {"writefile", required_argument, 0, 'w'}, + {"version", no_argument, 0, 'v'}, + {"no-checksums", no_argument, 0, 'C'}, + {"force-dns", no_argument, 0, 'F'}, + {"load-seeds", required_argument, 0, 'G'}, + {"save-seeds", required_argument, 0, 'H'}, + {"print-plugins", no_argument, 0, 'N'}, + {"prime-dns", no_argument, 0, 'P'}, + {"time", no_argument, 0, 'Q'}, + {"debug-rules", no_argument, 0, 'S'}, + {"re-level", required_argument, 0, 'T'}, + {"watchdog", no_argument, 0, 'W'}, + {"print-id", required_argument, 0, 'I'}, + {"status-file", required_argument, 0, 'U'}, + +#ifdef DEBUG + {"debug", required_argument, 0, 'B'}, +#endif +#ifdef USE_IDMEF + {"idmef-dtd", required_argument, 0, 'n'}, +#endif +#ifdef USE_PERFTOOLS_DEBUG + {"mem-leaks", no_argument, 0, 'm'}, + {"mem-profile", no_argument, 0, 'M'}, +#endif + + {"pseudo-realtime", optional_argument, 0, 'E'}, + {"jobs", optional_argument, 0, 'j'}, + {"test", no_argument, 0, '#'}, + + {0, 0, 0, 0}, + }; + + char opts[256]; + safe_strncpy(opts, "B:e:f:G:H:I:i:j::n:p:r:s:T:t:U:w:X:CFNPQSWabdhv", + sizeof(opts)); + +#ifdef USE_PERFTOOLS_DEBUG + strncat(opts, "mM", 2); +#endif + + int op; + int long_optsind; + opterr = 0; + + // getopt may permute the array, so need yet another array + auto zargs = std::make_unique(zeek_args.size()); + + for ( auto i = 0u; i < zeek_args.size(); ++i ) + zargs[i] = zeek_args[i].data(); + + while ( (op = getopt_long(zeek_args.size(), zargs.get(), opts, long_opts, &long_optsind)) != EOF ) + switch ( op ) { + case 'a': + rval.parse_only = true; + break; + case 'b': + rval.bare_mode = true; + break; + case 'd': + rval.debug_scripts = true; + break; + case 'e': + rval.script_code_to_exec = optarg; + break; + case 'f': + rval.pcap_filter = optarg; + break; + case 'h': + rval.print_usage = true; + break; + case 'i': + if ( ! rval.pcap_files.empty() ) + { + fprintf(stderr, "Using -i is not allowed when reading pcap files"); + exit(1); + } + rval.interfaces.emplace_back(optarg); + break; + case 'j': + rval.supervisor_mode = true; + if ( optarg ) + { + // TODO: for supervised offline pcap reading, the argument is + // expected to be number of workers like "-j 4" or possibly a + // list of worker/proxy/logger counts like "-j 4,2,1" + } + break; + case 'p': + rval.script_prefixes.emplace_back(optarg); + break; + case 'r': + if ( ! rval.interfaces.empty() ) + { + fprintf(stderr, "Using -r is not allowed when reading a live interface"); + exit(1); + } + rval.pcap_files.emplace_back(optarg); + break; + case 's': + rval.signature_files.emplace_back(optarg); + break; + case 't': + rval.debug_script_tracing_file = optarg; + break; + case 'v': + rval.print_version = true; + break; + case 'w': + rval.pcap_output_file = optarg; + break; + case 'B': + rval.debug_log_streams = optarg; + break; + case 'C': + rval.ignore_checksums = true; + break; + case 'E': + rval.pseudo_realtime = 1.0; + if ( optarg ) + rval.pseudo_realtime = atof(optarg); + break; + case 'F': + if ( rval.dns_mode != DNS_DEFAULT ) + usage(zargs[0], 1); + rval.dns_mode = DNS_FORCE; + break; + case 'G': + rval.random_seed_input_file = optarg; + break; + case 'H': + rval.random_seed_output_file = optarg; + break; + case 'I': + rval.identifier_to_print = optarg; + break; + case 'N': + ++rval.print_plugins; + break; + case 'P': + if ( rval.dns_mode != DNS_DEFAULT ) + usage(zargs[0], 1); + rval.dns_mode = DNS_PRIME; + break; + case 'Q': + rval.print_execution_time = true; + break; + case 'S': + rval.print_signature_debug_info = true; + break; + case 'T': + rval.signature_re_level = atoi(optarg); + break; + case 'U': + rval.process_status_file = optarg; + break; + case 'W': + rval.use_watchdog = true; + break; + case 'X': + rval.zeekygen_config_file = optarg; + break; + +#ifdef USE_PERFTOOLS_DEBUG + case 'm': + rval.perftools_check_leaks = 1; + break; + case 'M': + rval.perftools_profile = 1; + break; +#endif + +#ifdef USE_IDMEF + case 'n': + rval.libidmef_dtd_path = optarg; + break; +#endif + + case '#': + fprintf(stderr, "ERROR: --test only allowed as first argument.\n"); + usage(zargs[0], 1); + break; + + case 0: + // This happens for long options that don't have + // a short-option equivalent. + break; + + case '?': + default: + usage(zargs[0], 1); + break; + } + + // Process remaining arguments. X=Y arguments indicate script + // variable/parameter assignments. X::Y arguments indicate plugins to + // activate/query. The remainder are treated as scripts to load. + while ( optind < static_cast(zeek_args.size()) ) + { + if ( strchr(zargs[optind], '=') ) + rval.script_options_to_set.emplace_back(zargs[optind++]); + else if ( strstr(zargs[optind], "::") ) + rval.plugins_to_load.emplace(zargs[optind++]); + else + rval.scripts_to_load.emplace_back(zargs[optind++]); + } + + auto canonify_script_path = [](std::string* path) + { + if ( path->empty() ) + return; + + *path = normalize_path(*path); + + if ( (*path)[0] == '/' || (*path)[0] == '~' ) + // Absolute path + return; + + if ( (*path)[0] != '.' ) + { + // Look up file in ZEEKPATH + auto res = find_script_file(*path, bro_path()); + + if ( res.empty() ) + { + fprintf(stderr, "failed to locate script: %s\n", path->data()); + exit(1); + } + + *path = res; + + if ( (*path)[0] == '/' || (*path)[0] == '~' ) + // Now an absolute path + return; + } + + // Need to translate relative path to absolute. + char cwd[PATH_MAX]; + + if ( ! getcwd(cwd, sizeof(cwd)) ) + { + fprintf(stderr, "failed to get current directory: %s\n", + strerror(errno)); + exit(1); + } + + *path = std::string(cwd) + "/" + *path; + }; + + if ( rval.supervisor_mode ) + { + // Translate any relative paths supplied to supervisor into absolute + // paths for use by supervised nodes since they have the option to + // operate out of a different working directory. + for ( auto& s : rval.scripts_to_load ) + canonify_script_path(&s); + } + + return rval; + } diff --git a/src/Options.h b/src/Options.h index 72d0389c74..01711481d9 100644 --- a/src/Options.h +++ b/src/Options.h @@ -74,4 +74,24 @@ struct Options { std::vector script_options_to_set; }; +/** + * Parse Zeek command-line arguments. + * @param argc argument count (same semantics as arguments to main()) + * @param argv argument strings (same semantics as arguments to main()) + * @return the parsed command-line options + */ +zeek::Options parse_cmdline(int argc, char** argv); + +/** + * Print command-line Zeek usage information and exit. + * @param prog the name/path of the Zeek command-line invocation + * @code the exit code to use + */ +void usage(const char* prog, int code = 1); + +/** + * @return true if zeek is running a "fake" DNS resolver, else false. + */ +bool fake_dns(); + } // namespace zeek diff --git a/src/main.cc b/src/main.cc index 98661df2ea..46618108c1 100644 --- a/src/main.cc +++ b/src/main.cc @@ -10,9 +10,6 @@ #include #include #include -#ifdef HAVE_GETOPT_H -#include -#endif #ifdef USE_IDMEF extern "C" { @@ -24,7 +21,6 @@ extern "C" { #include #include "Options.h" -#include "bsd-getopt-long.h" #include "input.h" #include "DNS_Mgr.h" #include "Frame.h" @@ -50,7 +46,6 @@ extern "C" { #include "threading/Manager.h" #include "input/Manager.h" #include "logging/Manager.h" -#include "logging/writers/ascii/Ascii.h" #include "input/readers/raw/Raw.h" #include "analyzer/Manager.h" #include "analyzer/Tag.h" @@ -148,77 +143,6 @@ const char* zeek_version() #endif } -static bool zeek_dns_fake() - { - return zeekenv("ZEEK_DNS_FAKE"); - } - -static void usage(const char* prog, int code = 1) - { - fprintf(stderr, "zeek version %s\n", zeek_version()); - - fprintf(stderr, "usage: %s [options] [file ...]\n", prog); - fprintf(stderr, "usage: %s --test [doctest-options] -- [options] [file ...]\n", prog); - fprintf(stderr, " | Zeek script file, or read stdin\n"); - fprintf(stderr, " -a|--parse-only | exit immediately after parsing scripts\n"); - fprintf(stderr, " -b|--bare-mode | don't load scripts from the base/ directory\n"); - fprintf(stderr, " -d|--debug-script | activate Zeek script debugging\n"); - fprintf(stderr, " -e|--exec | augment loaded scripts by given code\n"); - fprintf(stderr, " -f|--filter | tcpdump filter\n"); - fprintf(stderr, " -h|--help | command line help\n"); - fprintf(stderr, " -i|--iface | read from given interface\n"); - fprintf(stderr, " -p|--prefix | add given prefix to Zeek script file resolution\n"); - fprintf(stderr, " -r|--readfile | read from given tcpdump file\n"); - fprintf(stderr, " -s|--rulefile | read rules from given file\n"); - fprintf(stderr, " -t|--tracefile | activate execution tracing\n"); - fprintf(stderr, " -v|--version | print version and exit\n"); - fprintf(stderr, " -w|--writefile | write to given tcpdump file\n"); -#ifdef DEBUG - fprintf(stderr, " -B|--debug | Enable debugging output for selected streams ('-B help' for help)\n"); -#endif - fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); - fprintf(stderr, " -F|--force-dns | force DNS\n"); - fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); - fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); - fprintf(stderr, " -I|--print-id | print out given ID\n"); - fprintf(stderr, " -N|--print-plugins | print available plugins and exit (-NN for verbose)\n"); - fprintf(stderr, " -P|--prime-dns | prime DNS\n"); - fprintf(stderr, " -Q|--time | print execution time summary to stderr\n"); - fprintf(stderr, " -S|--debug-rules | enable rule debugging\n"); - fprintf(stderr, " -T|--re-level | set 'RE_level' for rules\n"); - fprintf(stderr, " -U|--status-file | Record process status in file\n"); - fprintf(stderr, " -W|--watchdog | activate watchdog timer\n"); - fprintf(stderr, " -X|--zeekygen | generate documentation based on config file\n"); - -#ifdef USE_PERFTOOLS_DEBUG - fprintf(stderr, " -m|--mem-leaks | show leaks [perftools]\n"); - fprintf(stderr, " -M|--mem-profile | record heap [perftools]\n"); -#endif - fprintf(stderr, " --pseudo-realtime[=] | enable pseudo-realtime for performance evaluation (default 1)\n"); - fprintf(stderr, " -j|--jobs | enable supervisor mode\n"); - -#ifdef USE_IDMEF - fprintf(stderr, " -n|--idmef-dtd | specify path to IDMEF DTD file\n"); -#endif - - fprintf(stderr, " --test | run unit tests ('--test -h' for help, only when compiling with ENABLE_ZEEK_UNIT_TESTS)\n"); - fprintf(stderr, " $ZEEKPATH | file search path (%s)\n", bro_path().c_str()); - fprintf(stderr, " $ZEEK_PLUGIN_PATH | plugin search path (%s)\n", bro_plugin_path()); - fprintf(stderr, " $ZEEK_PLUGIN_ACTIVATE | plugins to always activate (%s)\n", bro_plugin_activate()); - fprintf(stderr, " $ZEEK_PREFIXES | prefix list (%s)\n", bro_prefixes().c_str()); - fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n", zeek_dns_fake() ? "on" : "off"); - fprintf(stderr, " $ZEEK_SEED_FILE | file to load seeds from (not set)\n"); - fprintf(stderr, " $ZEEK_LOG_SUFFIX | ASCII log file extension (.%s)\n", logging::writer::Ascii::LogExt().c_str()); - fprintf(stderr, " $ZEEK_PROFILER_FILE | Output file for script execution statistics (not set)\n"); - fprintf(stderr, " $ZEEK_DISABLE_ZEEKYGEN | Disable Zeekygen documentation support (%s)\n", zeekenv("ZEEK_DISABLE_ZEEKYGEN") ? "set" : "not set"); - fprintf(stderr, " $ZEEK_DNS_RESOLVER | IPv4/IPv6 address of DNS resolver to use (%s)\n", zeekenv("ZEEK_DNS_RESOLVER") ? zeekenv("ZEEK_DNS_RESOLVER") : "not set, will use first IPv4 address from /etc/resolv.conf"); - fprintf(stderr, " $ZEEK_DEBUG_LOG_STDERR | Use stderr for debug logs generated via the -B flag"); - - fprintf(stderr, "\n"); - - exit(code); - } - static std::vector to_cargs(const std::vector& args) { std::vector rval; @@ -230,330 +154,6 @@ static std::vector to_cargs(const std::vector& args) return rval; } -static zeek::Options parse_cmdline(int argc, char** argv) - { - zeek::Options rval; - - // When running unit tests, the first argument on the command line must be - // --test, followed by doctest options. Optionally, users can use "--" as - // separator to pass Zeek options afterwards: - // - // zeek --test [doctest-options] -- [zeek-options] - - // Just locally filtering out the args for Zeek usage from doctest args. - std::vector zeek_args; - - if ( argc > 1 && strcmp(argv[1], "--test") == 0 ) - { - #ifdef DOCTEST_CONFIG_DISABLE - fprintf(stderr, "ERROR: C++ unit tests are disabled for this build.\n" - " Please re-compile with ENABLE_ZEEK_UNIT_TESTS " - "to run the C++ unit tests.\n"); - usage(argv[0], 1); - #endif - - auto is_separator = [](const char* cstr) - { - return strcmp(cstr, "--") == 0; - }; - auto first = argv; - auto last = argv + argc; - auto separator = std::find_if(first, last, is_separator); - zeek_args.emplace_back(argv[0]); - - if ( separator != last ) - { - auto first_zeek_arg = std::next(separator); - - for ( auto i = first_zeek_arg; i != last; ++i ) - zeek_args.emplace_back(*i); - } - - rval.run_unit_tests = true; - - for ( auto i = 0; i < std::distance(first, separator); ++i ) - rval.doctest_args.emplace_back(argv[i]); - } - else - { - for ( auto i = 0; i < argc; ++i ) - zeek_args.emplace_back(argv[i]); - } - - constexpr struct option long_opts[] = { - {"parse-only", no_argument, 0, 'a'}, - {"bare-mode", no_argument, 0, 'b'}, - {"debug-script", no_argument, 0, 'd'}, - {"exec", required_argument, 0, 'e'}, - {"filter", required_argument, 0, 'f'}, - {"help", no_argument, 0, 'h'}, - {"iface", required_argument, 0, 'i'}, - {"zeekygen", required_argument, 0, 'X'}, - {"prefix", required_argument, 0, 'p'}, - {"readfile", required_argument, 0, 'r'}, - {"rulefile", required_argument, 0, 's'}, - {"tracefile", required_argument, 0, 't'}, - {"writefile", required_argument, 0, 'w'}, - {"version", no_argument, 0, 'v'}, - {"no-checksums", no_argument, 0, 'C'}, - {"force-dns", no_argument, 0, 'F'}, - {"load-seeds", required_argument, 0, 'G'}, - {"save-seeds", required_argument, 0, 'H'}, - {"print-plugins", no_argument, 0, 'N'}, - {"prime-dns", no_argument, 0, 'P'}, - {"time", no_argument, 0, 'Q'}, - {"debug-rules", no_argument, 0, 'S'}, - {"re-level", required_argument, 0, 'T'}, - {"watchdog", no_argument, 0, 'W'}, - {"print-id", required_argument, 0, 'I'}, - {"status-file", required_argument, 0, 'U'}, - -#ifdef DEBUG - {"debug", required_argument, 0, 'B'}, -#endif -#ifdef USE_IDMEF - {"idmef-dtd", required_argument, 0, 'n'}, -#endif -#ifdef USE_PERFTOOLS_DEBUG - {"mem-leaks", no_argument, 0, 'm'}, - {"mem-profile", no_argument, 0, 'M'}, -#endif - - {"pseudo-realtime", optional_argument, 0, 'E'}, - {"jobs", optional_argument, 0, 'j'}, - {"test", no_argument, 0, '#'}, - - {0, 0, 0, 0}, - }; - - char opts[256]; - safe_strncpy(opts, "B:e:f:G:H:I:i:j::n:p:r:s:T:t:U:w:X:CFNPQSWabdhv", - sizeof(opts)); - -#ifdef USE_PERFTOOLS_DEBUG - strncat(opts, "mM", 2); -#endif - - int op; - int long_optsind; - opterr = 0; - - // getopt may permute the array, so need yet another array - auto zargs = std::make_unique(zeek_args.size()); - - for ( auto i = 0u; i < zeek_args.size(); ++i ) - zargs[i] = zeek_args[i].data(); - - while ( (op = getopt_long(zeek_args.size(), zargs.get(), opts, long_opts, &long_optsind)) != EOF ) - switch ( op ) { - case 'a': - rval.parse_only = true; - break; - case 'b': - rval.bare_mode = true; - break; - case 'd': - rval.debug_scripts = true; - break; - case 'e': - rval.script_code_to_exec = optarg; - break; - case 'f': - rval.pcap_filter = optarg; - break; - case 'h': - rval.print_usage = true; - break; - case 'i': - if ( ! rval.pcap_files.empty() ) - { - fprintf(stderr, "Using -i is not allowed when reading pcap files"); - exit(1); - } - rval.interfaces.emplace_back(optarg); - break; - case 'j': - rval.supervisor_mode = true; - if ( optarg ) - { - // TODO: for supervised offline pcap reading, the argument is - // expected to be number of workers like "-j 4" or possibly a - // list of worker/proxy/logger counts like "-j 4,2,1" - } - break; - case 'p': - rval.script_prefixes.emplace_back(optarg); - break; - case 'r': - if ( ! rval.interfaces.empty() ) - { - fprintf(stderr, "Using -r is not allowed when reading a live interface"); - exit(1); - } - rval.pcap_files.emplace_back(optarg); - break; - case 's': - rval.signature_files.emplace_back(optarg); - break; - case 't': - rval.debug_script_tracing_file = optarg; - break; - case 'v': - rval.print_version = true; - break; - case 'w': - rval.pcap_output_file = optarg; - break; - case 'B': - rval.debug_log_streams = optarg; - break; - case 'C': - rval.ignore_checksums = true; - break; - case 'E': - rval.pseudo_realtime = 1.0; - if ( optarg ) - rval.pseudo_realtime = atof(optarg); - break; - case 'F': - if ( rval.dns_mode != DNS_DEFAULT ) - usage(zargs[0], 1); - rval.dns_mode = DNS_FORCE; - break; - case 'G': - rval.random_seed_input_file = optarg; - break; - case 'H': - rval.random_seed_output_file = optarg; - break; - case 'I': - rval.identifier_to_print = optarg; - break; - case 'N': - ++rval.print_plugins; - break; - case 'P': - if ( rval.dns_mode != DNS_DEFAULT ) - usage(zargs[0], 1); - rval.dns_mode = DNS_PRIME; - break; - case 'Q': - rval.print_execution_time = true; - break; - case 'S': - rval.print_signature_debug_info = true; - break; - case 'T': - rval.signature_re_level = atoi(optarg); - break; - case 'U': - rval.process_status_file = optarg; - break; - case 'W': - rval.use_watchdog = true; - break; - case 'X': - rval.zeekygen_config_file = optarg; - break; - -#ifdef USE_PERFTOOLS_DEBUG - case 'm': - rval.perftools_check_leaks = 1; - break; - case 'M': - rval.perftools_profile = 1; - break; -#endif - -#ifdef USE_IDMEF - case 'n': - rval.libidmef_dtd_path = optarg; - break; -#endif - - case '#': - fprintf(stderr, "ERROR: --test only allowed as first argument.\n"); - usage(zargs[0], 1); - break; - - case 0: - // This happens for long options that don't have - // a short-option equivalent. - break; - - case '?': - default: - usage(zargs[0], 1); - break; - } - - // Process remaining arguments. X=Y arguments indicate script - // variable/parameter assignments. X::Y arguments indicate plugins to - // activate/query. The remainder are treated as scripts to load. - while ( optind < static_cast(zeek_args.size()) ) - { - if ( strchr(zargs[optind], '=') ) - rval.script_options_to_set.emplace_back(zargs[optind++]); - else if ( strstr(zargs[optind], "::") ) - rval.plugins_to_load.emplace(zargs[optind++]); - else - rval.scripts_to_load.emplace_back(zargs[optind++]); - } - - auto canonify_script_path = [](std::string* path) - { - if ( path->empty() ) - return; - - *path = normalize_path(*path); - - if ( (*path)[0] == '/' || (*path)[0] == '~' ) - // Absolute path - return; - - if ( (*path)[0] != '.' ) - { - // Look up file in ZEEKPATH - auto res = find_script_file(*path, bro_path()); - - if ( res.empty() ) - { - fprintf(stderr, "failed to locate script: %s\n", path->data()); - exit(1); - } - - *path = res; - - if ( (*path)[0] == '/' || (*path)[0] == '~' ) - // Now an absolute path - return; - } - - // Need to translate relative path to absolute. - char cwd[PATH_MAX]; - - if ( ! getcwd(cwd, sizeof(cwd)) ) - { - fprintf(stderr, "failed to get current directory: %s\n", - strerror(errno)); - exit(1); - } - - *path = std::string(cwd) + "/" + *path; - }; - - if ( rval.supervisor_mode ) - { - // Translate any relative paths supplied to supervisor into absolute - // paths for use by supervised nodes since they have the option to - // operate out of a different working directory. - for ( auto& s : rval.scripts_to_load ) - canonify_script_path(&s); - } - - return rval; - } - bool show_plugins(int level) { plugin::Manager::plugin_list plugins = plugin_mgr->ActivePlugins(); @@ -816,10 +416,10 @@ int main(int argc, char** argv) for ( int i = 0; i < argc; i++ ) bro_argv[i] = copy_string(argv[i]); - auto options = parse_cmdline(argc, argv); + auto options = zeek::parse_cmdline(argc, argv); if ( options.print_usage ) - usage(argv[0], 0); + zeek::usage(argv[0], 0); if ( options.print_version ) { @@ -847,7 +447,7 @@ int main(int argc, char** argv) auto dns_type = options.dns_mode; - if ( dns_type == DNS_DEFAULT && zeek_dns_fake() ) + if ( dns_type == DNS_DEFAULT && zeek::fake_dns() ) dns_type = DNS_FAKE; RETSIGTYPE (*oldhandler)(int); From fdfd729206f42b3a9f6f8339a903144193360d86 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 27 Jan 2020 13:51:01 -0800 Subject: [PATCH 54/76] Add note that Supervisor script APIs are unstable until 4.0 --- doc | 2 +- scripts/base/frameworks/supervisor/api.zeek | 3 +++ scripts/base/frameworks/supervisor/control.zeek | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc b/doc index bcbcf4f766..06cd3e817c 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit bcbcf4f7663088e017101df4cab5ebdb35ce4d09 +Subproject commit 06cd3e817c4482ebee19bd825a0c871c98f08f8a diff --git a/scripts/base/frameworks/supervisor/api.zeek b/scripts/base/frameworks/supervisor/api.zeek index 814a8da5f3..7bd967c79e 100644 --- a/scripts/base/frameworks/supervisor/api.zeek +++ b/scripts/base/frameworks/supervisor/api.zeek @@ -1,4 +1,7 @@ ##! The Zeek process supervision API. +##! This API was introduced in Zeek 3.1.0 and considered unstable until 4.0.0. +##! That is, it may change in various incompatible ways without warning or +##! deprecation until the stable 4.0.0 release. module Supervisor; diff --git a/scripts/base/frameworks/supervisor/control.zeek b/scripts/base/frameworks/supervisor/control.zeek index 7b3b4a8e5c..fa20a9dba6 100644 --- a/scripts/base/frameworks/supervisor/control.zeek +++ b/scripts/base/frameworks/supervisor/control.zeek @@ -1,5 +1,8 @@ ##! The Zeek process supervision (remote) control API. This defines a Broker topic ##! prefix and events that can be used to control an external Zeek supervisor process. +##! This API was introduced in Zeek 3.1.0 and considered unstable until 4.0.0. +##! That is, it may change in various incompatible ways without warning or +##! deprecation until the stable 4.0.0 release. @load ./api From cb4258434ce6893a2384a960ef4d87f788a6ec7c Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 27 Jan 2020 22:16:11 +0100 Subject: [PATCH 55/76] util: use getrandom() on Linux if available Unlike /dev/urandom, getrandom() doesn't need a file descriptor and works when there is no /dev. It requires Linux 3.17 and glibc 2.25, but there is a fallback to the old code. For simplicity, this patch uses __has_include() to detect the availability of this API, but maybe we should move that to cmake. (It might be useful to refactor the whole random gathering code to a separate function.) --- src/util.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/util.cc b/src/util.cc index 18a4defde5..df7b13f663 100644 --- a/src/util.cc +++ b/src/util.cc @@ -55,6 +55,14 @@ #include "3rdparty/doctest.h" +#if defined(__linux__) && __cplusplus >= 201703L +/* need C++17 for __has_include() */ +#if __has_include() +#define HAVE_GETRANDOM +#include +#endif +#endif + TEST_CASE("util extract_ip") { CHECK(extract_ip("[1.2.3.4]") == "1.2.3.4"); @@ -1035,6 +1043,14 @@ void init_random_seed(const char* read_file, const char* write_file) seeds_done = true; } +#ifdef HAVE_GETRANDOM + if ( ! seeds_done ) + { + ssize_t nbytes = getrandom(buf, sizeof(buf), 0); + seeds_done = nbytes == ssize_t(sizeof(buf)); + } +#endif + if ( ! seeds_done ) { // Gather up some entropy. From f12629644628108b2bf6d1907ae93f0ccb23f2c0 Mon Sep 17 00:00:00 2001 From: Johanna Amann Date: Tue, 28 Jan 2020 12:07:03 -0800 Subject: [PATCH 56/76] Update submodule [nomail] --- aux/netcontrol-connectors | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aux/netcontrol-connectors b/aux/netcontrol-connectors index 6bb6709e75..ba7b6c81a1 160000 --- a/aux/netcontrol-connectors +++ b/aux/netcontrol-connectors @@ -1 +1 @@ -Subproject commit 6bb6709e755ecd2b930ff4a3ddd68f16d2b52cba +Subproject commit ba7b6c81a17b578fd4723e903c0a8b112b43e1b1 From b9ed77420c3bdf12c33209a753ae029d63b96ad3 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 28 Jan 2020 16:29:18 -0800 Subject: [PATCH 57/76] Update Cirrus CI config to use macOS Catalina --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index b14fcd08c1..b6dd77e461 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -94,7 +94,7 @@ ubuntu16_task: # image is available. macos_task: osx_instance: - image: mojave-base + image: catalina-base prepare_script: ./ci/macos/prepare.sh << : *CI_TEMPLATE env: From d0b206fa36e4b88250eef9e1cbb5ed046b6bfe5b Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 11:43:07 +0000 Subject: [PATCH 58/76] Remove APIs that were explicitly deprecated to be removed in 3.1. Special handling for bro_{init,done,script_loaded} events: if still used, they cause Zeek to abort at startup. --- aux/zeekctl | 2 +- scripts/base/utils/json.zeek | 2 - src/List.h | 26 +----- src/Val.cc | 29 +------ src/Val.h | 84 +------------------ src/analyzer/protocol/rfb/events.bif | 5 -- src/analyzer/protocol/rfb/rfb-analyzer.pac | 11 --- src/event.bif | 9 -- src/main.cc | 3 - src/parse.y | 14 ++-- src/strings.bif | 41 --------- src/zeek.bif | 20 ----- .../btest/Baseline/bifs.str_shell_escape/out | 4 - .../Baseline/coverage.bare-mode-errors/errors | 1 - .../coverage.init-default/missing_loads | 1 - testing/btest/Baseline/language.zeek_init/out | 12 +-- .../Baseline/language.zeek_script_loaded/out | 6 +- testing/btest/bifs/str_shell_escape.zeek | 15 ---- testing/btest/failed | 8 ++ testing/btest/language/zeek_init.zeek | 5 +- .../btest/language/zeek_script_loaded.zeek | 4 +- 21 files changed, 29 insertions(+), 273 deletions(-) delete mode 100644 scripts/base/utils/json.zeek delete mode 100644 testing/btest/Baseline/bifs.str_shell_escape/out delete mode 100644 testing/btest/bifs/str_shell_escape.zeek create mode 100644 testing/btest/failed diff --git a/aux/zeekctl b/aux/zeekctl index 6933b86e60..e166c066b6 160000 --- a/aux/zeekctl +++ b/aux/zeekctl @@ -1 +1 @@ -Subproject commit 6933b86e60f22f7a39ac1a8adbee4867902ce02e +Subproject commit e166c066b64b196a7dc5de285c68e0977344acc9 diff --git a/scripts/base/utils/json.zeek b/scripts/base/utils/json.zeek deleted file mode 100644 index a9662e74a2..0000000000 --- a/scripts/base/utils/json.zeek +++ /dev/null @@ -1,2 +0,0 @@ -## This file is deprecated in favor of to_json in zeek.bif -@deprecated="Remove in 3.1. to_json is now always available as a built-in function." diff --git a/src/List.h b/src/List.h index e0046bb55a..2b25dfb6f8 100644 --- a/src/List.h +++ b/src/List.h @@ -154,12 +154,6 @@ public: return max_entries; } - ZEEK_DEPRECATED("Remove in v3.1: Use std::sort instead") - void sort(list_cmp_func cmp_func) - { - qsort(entries, num_entries, sizeof(T), cmp_func); - } - int MemoryAllocation() const { return padded_sizeof(*this) + pad_size(max_entries * sizeof(T)); } @@ -174,7 +168,7 @@ public: ++num_entries; entries[0] = a; } - + void push_back(const T& a) { if ( num_entries == max_entries ) @@ -182,19 +176,13 @@ public: entries[num_entries++] = a; } - + void pop_front() { remove_nth(0); } void pop_back() { remove_nth(num_entries-1); } T& front() { return entries[0]; } T& back() { return entries[num_entries-1]; } - ZEEK_DEPRECATED("Remove in v3.1: Use push_front instead") - void insert(const T& a) // add at head of list - { - push_front(a); - } - // The append method is maintained for historical/compatibility reasons. // (It's commonly used in the event generation API) void append(const T& a) // add to end of list @@ -229,16 +217,6 @@ public: return old_ent; } - ZEEK_DEPRECATED("Remove in v3.1: Use back()/pop_back() instead") - T get() // return and remove ent at end of list - { - assert(num_entries > 0); - return entries[--num_entries]; - } - - ZEEK_DEPRECATED("Remove in v3.1: Use back() instead") - T& last() { return back(); } - // Return 0 if ent is not in the list, ent otherwise. bool is_member(const T& a) const { diff --git a/src/Val.cc b/src/Val.cc index fdaa719fab..2c690c590e 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -719,16 +719,6 @@ void IntervalVal::ValDescribe(ODesc* d) const } } -PortVal* PortManager::Get(uint32_t port_num) const - { - return val_mgr->GetPort(port_num); - } - -PortVal* PortManager::Get(uint32_t port_num, TransportProto port_type) const - { - return val_mgr->GetPort(port_num, port_type); - } - uint32_t PortVal::Mask(uint32_t port_num, TransportProto port_type) { // Note, for ICMP one-way connections: @@ -760,25 +750,8 @@ uint32_t PortVal::Mask(uint32_t port_num, TransportProto port_type) return port_num; } -PortVal::PortVal(uint32_t p, TransportProto port_type) : Val(TYPE_PORT) - { - auto port_num = PortVal::Mask(p, port_type); - val.uint_val = static_cast(port_num); - } - -PortVal::PortVal(uint32_t p, bool unused) : Val(TYPE_PORT) - { - val.uint_val = static_cast(p); - } - PortVal::PortVal(uint32_t p) : Val(TYPE_PORT) { - if ( p >= 65536 * NUM_PORT_SPACES ) - { - InternalWarning("bad port number"); - p = 0; - } - val.uint_val = static_cast(p); } @@ -3220,7 +3193,7 @@ ValManager::ValManager() auto port_type = (TransportProto)i; for ( auto j = 0u; j < arr.size(); ++j ) - arr[j] = new PortVal(PortVal::Mask(j, port_type), true); + arr[j] = new PortVal(PortVal::Mask(j, port_type)); } } diff --git a/src/Val.h b/src/Val.h index 50b3b7a20f..cda2dd43f8 100644 --- a/src/Val.h +++ b/src/Val.h @@ -85,56 +85,6 @@ typedef union { class Val : public BroObj { public: - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetBool, GetFalse/GetTrue, GetInt, or GetCount instead") - Val(bool b, TypeTag t) - { - val.int_val = b; - type = base_type(t); -#ifdef DEBUG - bound_id = 0; -#endif - } - - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetBool, GetFalse/GetTrue, GetInt, or GetCount instead") - Val(int32_t i, TypeTag t) - { - val.int_val = bro_int_t(i); - type = base_type(t); -#ifdef DEBUG - bound_id = 0; -#endif - } - - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetBool, GetFalse/GetTrue, GetInt, or GetCount instead") - Val(uint32_t u, TypeTag t) - { - val.uint_val = bro_uint_t(u); - type = base_type(t); -#ifdef DEBUG - bound_id = 0; -#endif - } - - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetBool, GetFalse/GetTrue, GetInt, or GetCount instead") - Val(int64_t i, TypeTag t) - { - val.int_val = i; - type = base_type(t); -#ifdef DEBUG - bound_id = 0; -#endif - } - - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetBool, GetFalse/GetTrue, GetInt, or GetCount instead") - Val(uint64_t u, TypeTag t) - { - val.uint_val = u; - type = base_type(t); -#ifdef DEBUG - bound_id = 0; -#endif - } - Val(double d, TypeTag t) { val.double_val = d; @@ -429,23 +379,6 @@ protected: }; -class PortManager { -public: - // Port number given in host order. - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetPort() instead") - PortVal* Get(uint32_t port_num, TransportProto port_type) const; - - // Host-order port number already masked with port space protocol mask. - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetPort() instead") - PortVal* Get(uint32_t port_num) const; - - // Returns a masked port number - ZEEK_DEPRECATED("Remove in v3.1: use PortVal::Mask() instead") - uint32_t Mask(uint32_t port_num, TransportProto port_type) const; -}; - -extern PortManager* port_mgr; - // Holds pre-allocated Val objects for those where it's more optimal to // re-use existing ones rather than allocate anew. class ValManager { @@ -521,14 +454,6 @@ protected: class PortVal : public Val { public: - // Port number given in host order. - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetPort() instead") - PortVal(uint32_t p, TransportProto port_type); - - // Host-order port number already masked with port space protocol mask. - ZEEK_DEPRECATED("Remove in v3.1: use val_mgr->GetPort() instead") - explicit PortVal(uint32_t p); - Val* SizeVal() const override { return val_mgr->GetInt(val.uint_val); } // Returns the port number in host order (not including the mask). @@ -559,7 +484,7 @@ protected: friend class Val; friend class ValManager; PortVal() {} - PortVal(uint32_t p, bool unused); + PortVal(uint32_t p); void ValDescribe(ODesc* d) const override; Val* DoClone(CloneState* state) override; @@ -1007,13 +932,6 @@ protected: class EnumVal : public Val { public: - - ZEEK_DEPRECATED("Remove in v3.1: use t->GetVal(i) instead") - EnumVal(int i, EnumType* t) : Val(t) - { - val.int_val = i; - } - Val* SizeVal() const override { return val_mgr->GetInt(val.int_val); } protected: diff --git a/src/analyzer/protocol/rfb/events.bif b/src/analyzer/protocol/rfb/events.bif index 44afb5b043..dd790d9a20 100644 --- a/src/analyzer/protocol/rfb/events.bif +++ b/src/analyzer/protocol/rfb/events.bif @@ -1,8 +1,3 @@ -## Generated for RFB event -## -## c: The connection record for the underlying transport-layer session/flow. -event rfb_event%(c: connection%) &deprecated="Remove in v3.1: This event never served a real purpose and will be removed. Please use the other rfb events instead."; - ## Generated for RFB event authentication mechanism selection ## ## c: The connection record for the underlying transport-layer session/flow. diff --git a/src/analyzer/protocol/rfb/rfb-analyzer.pac b/src/analyzer/protocol/rfb/rfb-analyzer.pac index ee0c4657bc..4568271453 100644 --- a/src/analyzer/protocol/rfb/rfb-analyzer.pac +++ b/src/analyzer/protocol/rfb/rfb-analyzer.pac @@ -1,11 +1,4 @@ refine flow RFB_Flow += { - function proc_rfb_message(msg: RFB_PDU): bool - %{ - if ( rfb_event ) - BifEvent::generate_rfb_event(connection()->bro_analyzer(), connection()->bro_analyzer()->Conn()); - return true; - %} - function proc_rfb_version(client: bool, major: bytestring, minor: bytestring) : bool %{ if (client) @@ -371,7 +364,3 @@ refine connection RFB_Conn += { return true; %} }; - -refine typeattr RFB_PDU += &let { - proc: bool = $context.flow.proc_rfb_message(this); -}; diff --git a/src/event.bif b/src/event.bif index 9fa5855359..d1ddf29d52 100644 --- a/src/event.bif +++ b/src/event.bif @@ -48,9 +48,6 @@ ## event zeek_init%(%); -## Deprecated synonym for :zeek:see:`zeek_init`. -event bro_init%(%) &deprecated="Remove in v3.1: use zeek_init"; - ## Generated at Zeek termination time. The event engine generates this event when ## Zeek is about to terminate, either due to having exhausted reading its input ## trace file(s), receiving a termination signal, or because Zeek was run without @@ -64,9 +61,6 @@ event bro_init%(%) &deprecated="Remove in v3.1: use zeek_init"; ## is not generated. event zeek_done%(%); -## Deprecated synonym for :zeek:see:`zeek_done`. -event bro_done%(%) &deprecated="Remove in v3.1: use zeek_done"; - ## Generated for every new connection. This event is raised with the first ## packet of a previously unknown connection. Zeek uses a flow-based definition ## of "connection" here that includes not only TCP sessions but also UDP and @@ -666,9 +660,6 @@ event reporter_error%(t: time, msg: string, location: string%) &error_handler; ## recursively for each ``@load``. event zeek_script_loaded%(path: string, level: count%); -## Deprecated synonym for :zeek:see:`zeek_script_loaded`. -event bro_script_loaded%(path: string, level: count%) &deprecated="Remove in v3.1: use zeek_script_loaded"; - ## Generated each time Zeek's script interpreter opens a file. This event is ## triggered only for files opened via :zeek:id:`open`, and in particular not for ## normal log files as created by log writers. diff --git a/src/main.cc b/src/main.cc index 45dbbec983..17211cd67c 100644 --- a/src/main.cc +++ b/src/main.cc @@ -84,7 +84,6 @@ int perftools_profile = 0; DNS_Mgr* dns_mgr; TimerMgr* timer_mgr; ValManager* val_mgr = 0; -PortManager* port_mgr = 0; logging::Manager* log_mgr = 0; threading::Manager* thread_mgr = 0; input::Manager* input_mgr = 0; @@ -364,7 +363,6 @@ void terminate_bro() delete reporter; delete plugin_mgr; delete val_mgr; - delete port_mgr; reporter = 0; } @@ -735,7 +733,6 @@ int main(int argc, char** argv) bro_start_time = current_time(true); val_mgr = new ValManager(); - port_mgr = new PortManager(); reporter = new Reporter(); thread_mgr = new threading::Manager(); plugin_mgr = new plugin::Manager(); diff --git a/src/parse.y b/src/parse.y index c409942cca..3f2e7fcc4b 100644 --- a/src/parse.y +++ b/src/parse.y @@ -1164,14 +1164,12 @@ func_hdr: } | TOK_EVENT event_id func_params opt_attr { - // Gracefully handle the deprecation of bro_init, bro_done, - // and bro_script_loaded - if ( streq("bro_init", $2->Name()) ) - $2 = global_scope()->Lookup("zeek_init"); - else if ( streq("bro_done", $2->Name()) ) - $2 = global_scope()->Lookup("zeek_done"); - else if ( streq("bro_script_loaded", $2->Name()) ) - $2 = global_scope()->Lookup("zeek_script_loaded"); + const char* name = $2->Name(); + if ( streq("bro_init", name) || streq("bro_done", name) || streq("bro_script_loaded", name) ) + { + auto base = std::string(name).substr(4); + reporter->Error(fmt("event %s() is no longer available, use zeek_%s() instead", name, base.c_str())); + } begin_func($2, current_module.c_str(), FUNC_FLAVOR_EVENT, 0, $3, $4); diff --git a/src/strings.bif b/src/strings.bif index 4f589bd078..ae1f8292d1 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -935,47 +935,6 @@ function safe_shell_quote%(source: string%): string return new StringVal(new BroString(1, dst, j)); %} -## Takes a string and escapes characters that would allow execution of -## commands at the shell level. Must be used before including strings in -## :zeek:id:`system` or similar calls. This function is deprecated, use -## :zeek:see:`safe_shell_quote` as a replacement. The difference is that -## :zeek:see:`safe_shell_quote` automatically returns a value that is -## wrapped in double-quotes, which is required to correctly and fully -## escape any characters that might be interpreted by the shell. -## -## source: The string to escape. -## -## Returns: A shell-escaped version of *source*. -## -## .. zeek:see:: system safe_shell_quote -function str_shell_escape%(source: string%): string &deprecated="Remove in v3.1: use safe_shell_quote" - %{ - unsigned j = 0; - const u_char* src = source->Bytes(); - unsigned n = source->Len(); - byte_vec dst = new u_char[n * 2 + 1]; - - for ( unsigned i = 0; i < n; ++i ) - { - switch ( src[i] ) { - case '`': case '"': case '\\': case '$': - - // case '|': case '&': case ';': case '(': case ')': case '<': - // case '>': case '\'': case '*': case '?': case '[': case ']': - // case '!': case '#': case '{': case '}': - dst[j++] = '\\'; - break; - default: - break; - } - - dst[j++] = src[i]; - } - - dst[j] = '\0'; - return new StringVal(new BroString(1, dst, j)); - %} - ## Finds all occurrences of a pattern in a string. ## ## str: The string to inspect. diff --git a/src/zeek.bif b/src/zeek.bif index 2f56ca52c0..c357c3ce5f 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -1820,15 +1820,6 @@ function getpid%(%) : count extern const char* zeek_version(); %%} -## Returns the Zeek version string. This function is deprecated, use -## :zeek:see:`zeek_version` instead. -## -## Returns: Zeek's version, e.g., 2.0-beta-47-debug. -function bro_version%(%): string &deprecated="Remove in v3.1: use zeek_version" - %{ - return new StringVal(zeek_version()); - %} - ## Returns the Zeek version string. ## ## Returns: Zeek's version, e.g., 2.0-beta-47-debug. @@ -2130,17 +2121,6 @@ function dump_rule_stats%(f: file%): bool return val_mgr->GetBool(1); %} -## Checks if Zeek is terminating. This function is deprecated, use -## :zeek:see:`zeek_is_terminating` instead. -## -## Returns: True if Zeek is in the process of shutting down. -## -## .. zeek:see:: terminate -function bro_is_terminating%(%): bool &deprecated="Remove in v3.1: use zeek_is_terminating" - %{ - return val_mgr->GetBool(terminating); - %} - ## Checks if Zeek is terminating. ## ## Returns: True if Zeek is in the process of shutting down. diff --git a/testing/btest/Baseline/bifs.str_shell_escape/out b/testing/btest/Baseline/bifs.str_shell_escape/out deleted file mode 100644 index 1845fefa37..0000000000 --- a/testing/btest/Baseline/bifs.str_shell_escape/out +++ /dev/null @@ -1,4 +0,0 @@ -24 -echo ${TEST} > "my file" -27 -echo \${TEST} > \"my file\" diff --git a/testing/btest/Baseline/coverage.bare-mode-errors/errors b/testing/btest/Baseline/coverage.bare-mode-errors/errors index c87b897c61..e69de29bb2 100644 --- a/testing/btest/Baseline/coverage.bare-mode-errors/errors +++ b/testing/btest/Baseline/coverage.bare-mode-errors/errors @@ -1 +0,0 @@ -warning in /Users/tim/Desktop/projects/zeek/testing/btest/../../scripts//base/utils/json.zeek, line 2: deprecated script loaded from command line arguments ="Remove in 3.1. to_json is now always available as a built-in function." diff --git a/testing/btest/Baseline/coverage.init-default/missing_loads b/testing/btest/Baseline/coverage.init-default/missing_loads index 87361a686a..893a603972 100644 --- a/testing/btest/Baseline/coverage.init-default/missing_loads +++ b/testing/btest/Baseline/coverage.init-default/missing_loads @@ -8,4 +8,3 @@ -./frameworks/openflow/cluster.zeek -./frameworks/packet-filter/cluster.zeek -./frameworks/sumstats/cluster.zeek --./utils/json.zeek diff --git a/testing/btest/Baseline/language.zeek_init/out b/testing/btest/Baseline/language.zeek_init/out index aa17ec8aa8..1a157c928a 100644 --- a/testing/btest/Baseline/language.zeek_init/out +++ b/testing/btest/Baseline/language.zeek_init/out @@ -1,8 +1,4 @@ -zeek_init at priority 10! -bro_init at priority 5! -zeek_init at priority 0! -bro_init at priority -10! -zeek_done at priority 10! -bro_done at priority 5! -zeek_done at priority 0! -bro_done at priority -10! +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_init/zeek_init.zeek, line 10: event bro_init() is no longer available, use zeek_init() instead +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_init/zeek_init.zeek, line 20: event bro_init() is no longer available, use zeek_init() instead +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_init/zeek_init.zeek, line 31: event bro_done() is no longer available, use zeek_done() instead +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_init/zeek_init.zeek, line 41: event bro_done() is no longer available, use zeek_done() instead diff --git a/testing/btest/Baseline/language.zeek_script_loaded/out b/testing/btest/Baseline/language.zeek_script_loaded/out index cddf509308..cc4b5291e3 100644 --- a/testing/btest/Baseline/language.zeek_script_loaded/out +++ b/testing/btest/Baseline/language.zeek_script_loaded/out @@ -1,4 +1,2 @@ -zeek_script_loaded priority 10 -bro_script_loaded priority 5 -zeek_script_loaded priority 0 -bro_script_loaded priority -10 +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_script_loaded/zeek_script_loaded.zeek, line 11: event bro_script_loaded() is no longer available, use zeek_script_loaded() instead +error in /home/robin/bro/master/testing/btest/.tmp/language.zeek_script_loaded/zeek_script_loaded.zeek, line 23: event bro_script_loaded() is no longer available, use zeek_script_loaded() instead diff --git a/testing/btest/bifs/str_shell_escape.zeek b/testing/btest/bifs/str_shell_escape.zeek deleted file mode 100644 index f3f08b0072..0000000000 --- a/testing/btest/bifs/str_shell_escape.zeek +++ /dev/null @@ -1,15 +0,0 @@ -# -# @TEST-EXEC: zeek -b %INPUT >out -# @TEST-EXEC: btest-diff out - -event zeek_init() - { - local a = "echo ${TEST} > \"my file\""; - - print |a|; - print a; - - local b = str_shell_escape(a); - print |b|; - print b; - } diff --git a/testing/btest/failed b/testing/btest/failed new file mode 100644 index 0000000000..a22b4ea1f0 --- /dev/null +++ b/testing/btest/failed @@ -0,0 +1,8 @@ +bifs.str_shell_escape +core.load-explicit-bro-suffix-fallback +coverage.init-default +language.zeek_init +language.zeek_script_loaded +plugins.legacy +coverage.sphinx-zeekygen-docs +coverage.bare-mode-errors diff --git a/testing/btest/language/zeek_init.zeek b/testing/btest/language/zeek_init.zeek index c1ca3ba65c..f8b63f8d51 100644 --- a/testing/btest/language/zeek_init.zeek +++ b/testing/btest/language/zeek_init.zeek @@ -1,6 +1,5 @@ -# @TEST-EXEC: zeek -b %INPUT >out -# @TEST-EXEC: btest-diff out - +# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out event zeek_init() &priority=10 { diff --git a/testing/btest/language/zeek_script_loaded.zeek b/testing/btest/language/zeek_script_loaded.zeek index 9011790e93..f8c112a0b5 100644 --- a/testing/btest/language/zeek_script_loaded.zeek +++ b/testing/btest/language/zeek_script_loaded.zeek @@ -1,5 +1,5 @@ -# @TEST-EXEC: zeek -b %INPUT >out -# @TEST-EXEC: btest-diff out +# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out event zeek_script_loaded(path: string, level: count) &priority=10 { From 0516f3f439decbe4c1b280cafa6001abddd4e3ac Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 12:01:27 +0000 Subject: [PATCH 59/76] Change Bro wrapper script to now abort when old executable names are still used. --- zeek-wrapper.in | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/zeek-wrapper.in b/zeek-wrapper.in index 1372c573e7..249df3ad53 100755 --- a/zeek-wrapper.in +++ b/zeek-wrapper.in @@ -1,14 +1,13 @@ #! /usr/bin/env bash # -# Wrapper to continue supporting old names of executables. -# This will print a deprecation warning to stderr if (1) stdin/stdout/stderr -# are all connected to a tty, and (2) the environment variable ZEEK_IS_BRO -# is unset. +# Wrapper to continue reporting usage old names of executables. +# This will print an error warning to stderr if stdin/stdout/stderr +# are all connected to a tty. In any case, it will abort with error +# exit code. function deprecated { cat >&2 < Date: Mon, 27 Jan 2020 12:04:24 +0000 Subject: [PATCH 60/76] Remove bro-config.h.in and bro-path-dev.in. --- CMakeLists.txt | 3 --- bro-config.h.in | 5 ----- bro-path-dev.in | 1 - 3 files changed, 9 deletions(-) delete mode 100644 bro-config.h.in delete mode 120000 bro-path-dev.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 956f2cd9b8..a919b8ffe1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -379,11 +379,8 @@ endif () string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zeek-config.h.in ${CMAKE_CURRENT_BINARY_DIR}/zeek-config.h) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bro-config.h.in - ${CMAKE_CURRENT_BINARY_DIR}/bro-config.h) include_directories(${CMAKE_CURRENT_BINARY_DIR}) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zeek-config.h DESTINATION include/zeek) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/bro-config.h DESTINATION include/zeek) if ( CAF_ROOT_DIR ) set(ZEEK_CONFIG_CAF_ROOT_DIR ${CAF_ROOT_DIR}) diff --git a/bro-config.h.in b/bro-config.h.in deleted file mode 100644 index 879249be38..0000000000 --- a/bro-config.h.in +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#pragma message("Warning: bro-config.h is deprecated, use zeek-config.h instead") - -#include "zeek-config.h" diff --git a/bro-path-dev.in b/bro-path-dev.in deleted file mode 120000 index 854029fbb8..0000000000 --- a/bro-path-dev.in +++ /dev/null @@ -1 +0,0 @@ -zeek-path-dev.in \ No newline at end of file From a941e3d94205dd8e39f01bab0e39e8591f5589bb Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 12:06:43 +0000 Subject: [PATCH 61/76] Remove old_comm_usage_is_ok. That was just a left-over without any functionality. --- scripts/base/init-bare.zeek | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 5a6f0a47ae..9a908b651e 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5228,10 +5228,3 @@ const global_hash_seed: string = "" &redef; ## files. The larger the value, the more confidence in UID uniqueness. ## The maximum is currently 128 bits. const bits_per_uid: count = 96 &redef; - -## Whether usage of the old communication system is considered an error or -## not. The default Zeek configuration no longer works with the non-Broker -## communication system unless you have manually taken action to initialize -## and set up the old comm. system. Deprecation warnings are still emitted -## when setting this flag, but they will not result in a fatal error. -const old_comm_usage_is_ok: bool = F &redef; From ad8e659943dc9af1a127b4c13ea2117a42d7ee88 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 12:08:00 +0000 Subject: [PATCH 62/76] Update notice user agent. --- scripts/base/frameworks/notice/main.zeek | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base/frameworks/notice/main.zeek b/scripts/base/frameworks/notice/main.zeek index 9e43a9ed50..f3bc6bc8df 100644 --- a/scripts/base/frameworks/notice/main.zeek +++ b/scripts/base/frameworks/notice/main.zeek @@ -402,7 +402,7 @@ function email_headers(subject_desc: string, dest: string): string "From: ", mail_from, "\n", "Subject: ", mail_subject_prefix, " ", subject_desc, "\n", "To: ", dest, "\n", - "User-Agent: Bro-IDS/", zeek_version(), "\n"); + "User-Agent: Zeek/", zeek_version(), "\n"); if ( reply_to != "" ) header_text = string_cat(header_text, "Reply-To: ", reply_to, "\n"); return header_text; From 123cbba6be924720bd366de87b5a56bd09c7157e Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 12:08:43 +0000 Subject: [PATCH 63/76] No longer symlink local.zeek to local.bro. --- scripts/CMakeLists.txt | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index e5611aefaa..96b2c897c1 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -7,26 +7,6 @@ install(DIRECTORY ./ DESTINATION ${ZEEK_SCRIPT_INSTALL_PATH} FILES_MATCHING PATTERN "*.fp" ) -if ( NOT BINARY_PACKAGING_MODE ) - # If the user has a local.bro file from a previous installation, prefer to - # symlink local.zeek to it to avoid breaking their custom configuration -- - # because ZeekControl will now prefer to load local.zeek rather than local.bro - # and we're about to install a default version of local.zeek. - - set(_local_bro_dst ${ZEEK_SCRIPT_INSTALL_PATH}/site/local.bro) - set(_local_zeek_dst ${ZEEK_SCRIPT_INSTALL_PATH}/site/local.zeek) - - install(CODE " - if ( \"\$ENV{DESTDIR}\" STREQUAL \"\" ) - if ( EXISTS \"${_local_bro_dst}\" AND NOT EXISTS \"${_local_zeek_dst}\" ) - message(STATUS \"WARNING: installed ${_local_zeek_dst} as symlink to ${_local_bro_dst}\") - execute_process(COMMAND \"${CMAKE_COMMAND}\" -E create_symlink - \"${_local_bro_dst}\" \"${_local_zeek_dst}\") - endif () - endif () - ") -endif () - # Install local script as a config file since it's meant to be modified directly. InstallPackageConfigFile( ${CMAKE_CURRENT_SOURCE_DIR}/site/local.zeek From bbc308cb021d70adcf6457314d363311a656a740 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 14:00:45 +0000 Subject: [PATCH 64/76] Fix missing rename. --- src/DNS_Mgr.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DNS_Mgr.cc b/src/DNS_Mgr.cc index 0b50700bd1..5599074b0d 100644 --- a/src/DNS_Mgr.cc +++ b/src/DNS_Mgr.cc @@ -467,7 +467,7 @@ void DNS_Mgr::InitPostScript() const char* cache_dir = dir ? dir : "."; cache_name = new char[strlen(cache_dir) + 64]; - sprintf(cache_name, "%s/%s", cache_dir, ".bro-dns-cache"); + sprintf(cache_name, "%s/%s", cache_dir, ".zeek-dns-cache"); LoadCache(fopen(cache_name, "r")); } From 649301b667e9027f6cd716c60a23880cacba07e2 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 14:17:41 +0000 Subject: [PATCH 65/76] Add warnings when loading scripts ending in ".bro", or using legacy environment variables. --- src/plugin/Manager.cc | 3 + src/util.cc | 70 ++++++++++++++----- src/util.h | 3 + .../out | 2 + .../load-explicit-bro-suffix-fallback.zeek | 4 +- 5 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/plugin/Manager.cc b/src/plugin/Manager.cc index 58dc52e033..51b85bdbb5 100644 --- a/src/plugin/Manager.cc +++ b/src/plugin/Manager.cc @@ -189,6 +189,7 @@ bool Manager::ActivateDynamicPluginInternal(const std::string& name, bool ok_if_ if ( is_file(init) ) { DBG_LOG(DBG_PLUGINS, " Loading %s", init.c_str()); + warn_if_legacy_script(init); scripts_to_load.push_back(init); break; } @@ -202,6 +203,7 @@ bool Manager::ActivateDynamicPluginInternal(const std::string& name, bool ok_if_ if ( is_file(init) ) { DBG_LOG(DBG_PLUGINS, " Loading %s", init.c_str()); + warn_if_legacy_script(init); scripts_to_load.push_back(init); break; } @@ -214,6 +216,7 @@ bool Manager::ActivateDynamicPluginInternal(const std::string& name, bool ok_if_ if ( is_file(init) ) { DBG_LOG(DBG_PLUGINS, " Loading %s", init.c_str()); + warn_if_legacy_script(init); scripts_to_load.push_back(init); break; } diff --git a/src/util.cc b/src/util.cc index 519c7ba35d..e52f365d1c 100644 --- a/src/util.cc +++ b/src/util.cc @@ -62,6 +62,36 @@ #endif #endif +static bool starts_with(const std::string_view& s, const std::string& beginning) + { + if ( beginning.size() > s.size() ) + return false; + + return std::equal(beginning.begin(), beginning.end(), s.begin()); + } + +TEST_CASE("util starts_with") + { + CHECK(starts_with("abcde", "ab") == true); + CHECK(starts_with("abcde", "de") == false); + CHECK(starts_with("abcde", "abcedf") == false); + } + +static bool ends_with(const std::string_view& s, const std::string& ending) + { + if ( ending.size() > s.size() ) + return false; + + return std::equal(ending.rbegin(), ending.rend(), s.rbegin()); + } + +TEST_CASE("util ends_with") + { + CHECK(ends_with("abcde", "de") == true); + CHECK(ends_with("abcde", "fg") == false); + CHECK(ends_with("abcde", "abcedf") == false); + } + TEST_CASE("util extract_ip") { CHECK(extract_ip("[1.2.3.4]") == "1.2.3.4"); @@ -1249,6 +1279,15 @@ TEST_CASE("util is_package_loader") const array script_extensions = {".zeek", ".bro"}; +void warn_if_legacy_script(const std::string_view& filename) + { + if ( ends_with(filename, ".bro") ) + { + std::string x(filename); + reporter->Warning("Loading script '%s' with legacy extension, support for '.bro' will be removed in Zeek v4.1", x.c_str()); + } + } + bool is_package_loader(const string& path) { string filename(std::move(SafeBasename(path).result)); @@ -1256,7 +1295,10 @@ bool is_package_loader(const string& path) for ( const string& ext : script_extensions ) { if ( filename == "__load__" + ext ) + { + warn_if_legacy_script(filename); return true; + } } return false; @@ -1294,6 +1336,7 @@ FILE* open_package(string& path, const string& mode) string p = path + ext; if ( can_read(p) ) { + warn_if_legacy_script(path); path.append(ext); return open_file(path, mode); } @@ -1604,21 +1647,6 @@ string find_file(const string& filename, const string& path_set, return string(); } -static bool ends_with(const std::string& s, const std::string& ending) - { - if ( ending.size() > s.size() ) - return false; - - return std::equal(ending.rbegin(), ending.rend(), s.rbegin()); - } - -TEST_CASE("util ends_with") - { - CHECK(ends_with("abcde", "de") == true); - CHECK(ends_with("abcde", "fg") == false); - CHECK(ends_with("abcde", "abcedf") == false); - } - string find_script_file(const string& filename, const string& path_set) { vector paths; @@ -1631,11 +1659,16 @@ string find_script_file(const string& filename, const string& path_set) string f = find_file_in_path(filename, paths[n], ext); if ( ! f.empty() ) + { + warn_if_legacy_script(f); return f; + } } if ( ends_with(filename, ".bro") ) { + warn_if_legacy_script(filename); + // We were looking for a file explicitly ending in .bro and didn't // find it, so fall back to one ending in .zeek, if it exists. auto fallback = string(filename.data(), filename.size() - 4) + ".zeek"; @@ -2186,7 +2219,12 @@ char* zeekenv(const char* name) if ( it == legacy_vars.end() ) return rval; - return getenv(it->second); + auto val = getenv(it->second); + + if ( val && starts_with(it->second, "BRO_") ) + reporter->Warning("Using legacy environment variable %s, support will be removed in Zeek v4.1; use %s instead", it->second, name); + + return val; } static string json_escape_byte(char c) diff --git a/src/util.h b/src/util.h index 5a5a8c5159..51cf00242f 100644 --- a/src/util.h +++ b/src/util.h @@ -265,6 +265,9 @@ extern std::string bro_prefixes(); extern const std::array script_extensions; +/** Prints a warning if the filename ends in .bro. */ +void warn_if_legacy_script(const std::string_view& filename); + bool is_package_loader(const std::string& path); extern void add_to_bro_path(const std::string& dir); diff --git a/testing/btest/Baseline/core.load-explicit-bro-suffix-fallback/out b/testing/btest/Baseline/core.load-explicit-bro-suffix-fallback/out index c67eefbfc1..a0a25698a1 100644 --- a/testing/btest/Baseline/core.load-explicit-bro-suffix-fallback/out +++ b/testing/btest/Baseline/core.load-explicit-bro-suffix-fallback/out @@ -1 +1,3 @@ +warning in /home/robin/bro/master/testing/btest/.tmp/core.load-explicit-bro-suffix-fallback/load-explicit-bro-suffix-fallback.zeek, line 5: Loading script 'foo.bro' with legacy extension, support for '.bro' will be removed in Zeek v4.1 +warning in /home/robin/bro/master/testing/btest/.tmp/core.load-explicit-bro-suffix-fallback/load-explicit-bro-suffix-fallback.zeek, line 5: Loading script 'foo.bro' with legacy extension, support for '.bro' will be removed in Zeek v4.1 loaded foo.zeek diff --git a/testing/btest/core/load-explicit-bro-suffix-fallback.zeek b/testing/btest/core/load-explicit-bro-suffix-fallback.zeek index d2ce412209..4cf890cb09 100644 --- a/testing/btest/core/load-explicit-bro-suffix-fallback.zeek +++ b/testing/btest/core/load-explicit-bro-suffix-fallback.zeek @@ -1,5 +1,5 @@ -# @TEST-EXEC: zeek -b %INPUT >out -# @TEST-EXEC: btest-diff out +# @TEST-EXEC: zeek -b %INPUT >out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out # We don't have a foo.bro, but we'll accept foo.zeek. @load foo.bro From e552a07249c12709b7243ff74259d8f81acd6656 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 14:42:34 +0000 Subject: [PATCH 66/76] Remove legancy symlinks in aux/. That's aux/bro-aux and aux/broctl. --- aux/bro-aux | 1 - aux/broctl | 1 - 2 files changed, 2 deletions(-) delete mode 120000 aux/bro-aux delete mode 120000 aux/broctl diff --git a/aux/bro-aux b/aux/bro-aux deleted file mode 120000 index b28967a8e2..0000000000 --- a/aux/bro-aux +++ /dev/null @@ -1 +0,0 @@ -zeek-aux \ No newline at end of file diff --git a/aux/broctl b/aux/broctl deleted file mode 120000 index d17a55b030..0000000000 --- a/aux/broctl +++ /dev/null @@ -1 +0,0 @@ -zeekctl \ No newline at end of file From 4eeabd521064adea6585775bae83e01c7a241bb4 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 15:40:52 +0000 Subject: [PATCH 67/76] Remove test for legacy plugin. We no longer support creating new plugins using the old naming scheme. --- testing/btest/Baseline/plugins.legacy/output | 6 -- .../btest/plugins/legacy-plugin/.btest-ignore | 0 .../plugins/legacy-plugin/CMakeLists.txt | 19 ------ .../scripts/Demo/Foo/base/main.zeek | 7 --- .../legacy-plugin/scripts/__load__.zeek | 1 - .../btest/plugins/legacy-plugin/src/Foo.cc | 59 ------------------- testing/btest/plugins/legacy-plugin/src/Foo.h | 29 --------- .../btest/plugins/legacy-plugin/src/Plugin.cc | 21 ------- .../plugins/legacy-plugin/src/events.bif | 2 - .../legacy-plugin/src/foo-analyzer.pac | 15 ----- .../legacy-plugin/src/foo-protocol.pac | 4 -- .../btest/plugins/legacy-plugin/src/foo.pac | 26 -------- testing/btest/plugins/legacy.zeek | 14 ----- 13 files changed, 203 deletions(-) delete mode 100644 testing/btest/Baseline/plugins.legacy/output delete mode 100644 testing/btest/plugins/legacy-plugin/.btest-ignore delete mode 100644 testing/btest/plugins/legacy-plugin/CMakeLists.txt delete mode 100644 testing/btest/plugins/legacy-plugin/scripts/Demo/Foo/base/main.zeek delete mode 100644 testing/btest/plugins/legacy-plugin/scripts/__load__.zeek delete mode 100644 testing/btest/plugins/legacy-plugin/src/Foo.cc delete mode 100644 testing/btest/plugins/legacy-plugin/src/Foo.h delete mode 100644 testing/btest/plugins/legacy-plugin/src/Plugin.cc delete mode 100644 testing/btest/plugins/legacy-plugin/src/events.bif delete mode 100644 testing/btest/plugins/legacy-plugin/src/foo-analyzer.pac delete mode 100644 testing/btest/plugins/legacy-plugin/src/foo-protocol.pac delete mode 100644 testing/btest/plugins/legacy-plugin/src/foo.pac delete mode 100644 testing/btest/plugins/legacy.zeek diff --git a/testing/btest/Baseline/plugins.legacy/output b/testing/btest/Baseline/plugins.legacy/output deleted file mode 100644 index 675a884b16..0000000000 --- a/testing/btest/Baseline/plugins.legacy/output +++ /dev/null @@ -1,6 +0,0 @@ -Demo::Foo - A Foo test analyzer (dynamic, version 1.0.0) - [Analyzer] Foo (ANALYZER_FOO, enabled) - [Event] foo_message - -=== -foo_message, [orig_h=::1, orig_p=37927/tcp, resp_h=::1, resp_p=4242/tcp], Hello, Foo!\x0a diff --git a/testing/btest/plugins/legacy-plugin/.btest-ignore b/testing/btest/plugins/legacy-plugin/.btest-ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/testing/btest/plugins/legacy-plugin/CMakeLists.txt b/testing/btest/plugins/legacy-plugin/CMakeLists.txt deleted file mode 100644 index 92e1a90e9d..0000000000 --- a/testing/btest/plugins/legacy-plugin/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ - -project(Zeek-Plugin-Demo-Foo) - -cmake_minimum_required(VERSION 2.6.3) - -if ( NOT BRO_DIST ) - message(FATAL_ERROR "BRO_DIST not set") -endif () - -set(CMAKE_MODULE_PATH ${BRO_DIST}/cmake) - -include(BroPlugin) - -bro_plugin_begin(Demo Foo) -bro_plugin_cc(src/Plugin.cc) -bro_plugin_cc(src/Foo.cc) -bro_plugin_bif(src/events.bif) -bro_plugin_pac(src/foo.pac src/foo-protocol.pac src/foo-analyzer.pac) -bro_plugin_end() diff --git a/testing/btest/plugins/legacy-plugin/scripts/Demo/Foo/base/main.zeek b/testing/btest/plugins/legacy-plugin/scripts/Demo/Foo/base/main.zeek deleted file mode 100644 index 76c63723b7..0000000000 --- a/testing/btest/plugins/legacy-plugin/scripts/Demo/Foo/base/main.zeek +++ /dev/null @@ -1,7 +0,0 @@ - -const ports = { 4242/tcp }; - -event zeek_init() &priority=5 - { - Analyzer::register_for_ports(Analyzer::ANALYZER_FOO, ports); - } diff --git a/testing/btest/plugins/legacy-plugin/scripts/__load__.zeek b/testing/btest/plugins/legacy-plugin/scripts/__load__.zeek deleted file mode 100644 index 330718c604..0000000000 --- a/testing/btest/plugins/legacy-plugin/scripts/__load__.zeek +++ /dev/null @@ -1 +0,0 @@ -@load Demo/Foo/base/main diff --git a/testing/btest/plugins/legacy-plugin/src/Foo.cc b/testing/btest/plugins/legacy-plugin/src/Foo.cc deleted file mode 100644 index be3c52a98b..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/Foo.cc +++ /dev/null @@ -1,59 +0,0 @@ - -#include "Foo.h" -#include "foo_pac.h" -#include "events.bif.h" - -#include - -using namespace plugin::Demo_Foo; - -Foo::Foo(Connection* conn) - : analyzer::tcp::TCP_ApplicationAnalyzer("Foo", conn) - { - interp = new binpac::Foo::Foo_Conn(this); - } - -Foo::~Foo() - { - delete interp; - } - -void Foo::Done() - { - analyzer::tcp::TCP_ApplicationAnalyzer::Done(); - - interp->FlowEOF(true); - interp->FlowEOF(false); - } - -void Foo::EndpointEOF(bool is_orig) - { - analyzer::tcp::TCP_ApplicationAnalyzer::EndpointEOF(is_orig); - interp->FlowEOF(is_orig); - } - -void Foo::DeliverStream(int len, const u_char* data, bool orig) - { - analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - - assert(TCP()); - - if ( TCP()->IsPartial() ) - // punt on partial. - return; - - try - { - interp->NewData(orig, data, data + len); - } - catch ( const binpac::Exception& e ) - { - ProtocolViolation(fmt("Binpac exception: %s", e.c_msg())); - } - } - -void Foo::Undelivered(uint64 seq, int len, bool orig) - { - analyzer::tcp::TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); - interp->NewGap(orig, len); - } diff --git a/testing/btest/plugins/legacy-plugin/src/Foo.h b/testing/btest/plugins/legacy-plugin/src/Foo.h deleted file mode 100644 index f68aa3814d..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/Foo.h +++ /dev/null @@ -1,29 +0,0 @@ - -#pragma once - -#include "analyzer/protocol/tcp/TCP.h" -#include "analyzer/protocol/pia/PIA.h" - -namespace binpac { namespace Foo { class Foo_Conn; } } - -namespace plugin { -namespace Demo_Foo { - -class Foo : public analyzer::tcp::TCP_ApplicationAnalyzer { -public: - Foo(Connection* conn); - ~Foo(); - - virtual void Done(); - virtual void DeliverStream(int len, const u_char* data, bool orig); - virtual void Undelivered(uint64 seq, int len, bool orig); - virtual void EndpointEOF(bool is_orig); - - static analyzer::Analyzer* Instantiate(Connection* conn) - { return new Foo(conn); } - -protected: - binpac::Foo::Foo_Conn* interp; -}; - -} } diff --git a/testing/btest/plugins/legacy-plugin/src/Plugin.cc b/testing/btest/plugins/legacy-plugin/src/Plugin.cc deleted file mode 100644 index bd2662d67c..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/Plugin.cc +++ /dev/null @@ -1,21 +0,0 @@ - -#include "Plugin.h" - -#include "Foo.h" - -namespace plugin { namespace Demo_Foo { Plugin plugin; } } - -using namespace plugin::Demo_Foo; - -plugin::Configuration Plugin::Configure() - { - AddComponent(new ::analyzer::Component("Foo", plugin::Demo_Foo::Foo::Instantiate)); - - plugin::Configuration config; - config.name = "Demo::Foo"; - config.description = "A Foo test analyzer"; - config.version.major = 1; - config.version.minor = 0; - config.version.patch = 0; - return config; - } diff --git a/testing/btest/plugins/legacy-plugin/src/events.bif b/testing/btest/plugins/legacy-plugin/src/events.bif deleted file mode 100644 index 4603fe4cf6..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/events.bif +++ /dev/null @@ -1,2 +0,0 @@ - -event foo_message%(c: connection, data: string%); diff --git a/testing/btest/plugins/legacy-plugin/src/foo-analyzer.pac b/testing/btest/plugins/legacy-plugin/src/foo-analyzer.pac deleted file mode 100644 index a210a8430c..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/foo-analyzer.pac +++ /dev/null @@ -1,15 +0,0 @@ - -refine connection Foo_Conn += { - - function Foo_data(msg: Foo_Message): bool - %{ - StringVal* data = new StringVal(${msg.data}.length(), (const char*) ${msg.data}.data()); - BifEvent::generate_foo_message(bro_analyzer(), bro_analyzer()->Conn(), data); - return true; - %} - -}; - -refine typeattr Foo_Message += &let { - proc: bool = $context.connection.Foo_data(this); -}; diff --git a/testing/btest/plugins/legacy-plugin/src/foo-protocol.pac b/testing/btest/plugins/legacy-plugin/src/foo-protocol.pac deleted file mode 100644 index 892513c4f0..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/foo-protocol.pac +++ /dev/null @@ -1,4 +0,0 @@ - -type Foo_Message(is_orig: bool) = record { - data: bytestring &restofdata; -}; diff --git a/testing/btest/plugins/legacy-plugin/src/foo.pac b/testing/btest/plugins/legacy-plugin/src/foo.pac deleted file mode 100644 index 826bcc624e..0000000000 --- a/testing/btest/plugins/legacy-plugin/src/foo.pac +++ /dev/null @@ -1,26 +0,0 @@ -%include binpac.pac -%include bro.pac - -%extern{ -#include "Foo.h" - -#include "events.bif.h" -%} - -analyzer Foo withcontext { - connection: Foo_Conn; - flow: Foo_Flow; -}; - -connection Foo_Conn(bro_analyzer: BroAnalyzer) { - upflow = Foo_Flow(true); - downflow = Foo_Flow(false); -}; - -%include foo-protocol.pac - -flow Foo_Flow(is_orig: bool) { - datagram = Foo_Message(is_orig) withcontext(connection, this); -}; - -%include foo-analyzer.pac diff --git a/testing/btest/plugins/legacy.zeek b/testing/btest/plugins/legacy.zeek deleted file mode 100644 index bb663d744b..0000000000 --- a/testing/btest/plugins/legacy.zeek +++ /dev/null @@ -1,14 +0,0 @@ -# Test that legacy Bro plugins still work. -# @TEST-EXEC: ${DIST}/aux/zeek-aux/plugin-support/init-plugin -u . Demo Foo -# @TEST-EXEC: cp -r %DIR/legacy-plugin/* . -# @TEST-EXEC: ./configure --bro-dist=${DIST} && make -# @TEST-EXEC: unset ZEEK_PLUGIN_PATH; BRO_PLUGIN_PATH=`pwd` zeek -NN Demo::Foo >>output -# @TEST-EXEC: echo === >>output -# @TEST-EXEC: unset ZEEK_PLUGIN_PATH; BRO_PLUGIN_PATH=`pwd` zeek -r $TRACES/port4242.trace %INPUT >>output -# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output - -event foo_message(c: connection, data: string) - { - print "foo_message", c$id, data; - } - From e1458360e3ea36b2f17ac6dc9c69e34de7a160b9 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 27 Jan 2020 14:43:05 +0000 Subject: [PATCH 68/76] Small cleanup and updating submodules. --- aux/zeek-aux | 2 +- aux/zeekctl | 2 +- doc | 2 +- zeek-wrapper.in | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aux/zeek-aux b/aux/zeek-aux index 0790f42014..cd0daf5fda 160000 --- a/aux/zeek-aux +++ b/aux/zeek-aux @@ -1 +1 @@ -Subproject commit 0790f420148806c1380fc7e0e0a4278c7970753c +Subproject commit cd0daf5fda4b1c0848ccc0025d67205b0e006da7 diff --git a/aux/zeekctl b/aux/zeekctl index e166c066b6..172ad814e2 160000 --- a/aux/zeekctl +++ b/aux/zeekctl @@ -1 +1 @@ -Subproject commit e166c066b64b196a7dc5de285c68e0977344acc9 +Subproject commit 172ad814e2404a36b6efdd603619ff80f6a9fbca diff --git a/doc b/doc index 7192dbedf3..799ed621e3 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 7192dbedf3ca9ce49294057262074f0e888177f3 +Subproject commit 799ed621e33a5d3e3ba0288241989bfcfbd17f95 diff --git a/zeek-wrapper.in b/zeek-wrapper.in index 249df3ad53..3c4123b27a 100755 --- a/zeek-wrapper.in +++ b/zeek-wrapper.in @@ -1,8 +1,8 @@ #! /usr/bin/env bash # # Wrapper to continue reporting usage old names of executables. -# This will print an error warning to stderr if stdin/stdout/stderr -# are all connected to a tty. In any case, it will abort with error +# This will print an error to stderr if stdin/stdout/stderr +# are all connected to a tty. It will then abort with an error # exit code. function deprecated { From 42a7629695caf7ef9b09ebeedee7bee97e64e7de Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 29 Jan 2020 12:14:55 +0000 Subject: [PATCH 69/76] Update NEWS for naming changes. --- NEWS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS b/NEWS index 208fb89d5b..8ffa29783e 100644 --- a/NEWS +++ b/NEWS @@ -29,6 +29,15 @@ New Functionality Changed Functionality --------------------- +- The naming-related backwards-compability wrappers & work-arounds + introduced in 3.0 have either changed their operation, or in some + cases been removed. Generally, anything that reported a + naming-related warning in 3.0 now aborts with a corresponding error + message. In cases where 3.0 silently continued to accept old names, + 3.1 now reports warnings. Most importantly, that's loading of + scripts with ".bro" endings, which are now flagged and should be + renamed. + - The key type of ``Known::service_store`` has changed to ``Known::AddrPortServTriplet`` and ``Known::services`` is now a table instead of just a set. From 2694b4e2c82e1c2e0995a4a4ca84056c505f5871 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 22 Jan 2020 17:40:30 +0100 Subject: [PATCH 70/76] Net: remove unused variable --- src/Net.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Net.cc b/src/Net.cc index 4ef0a0a54a..ac38eb846d 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -455,8 +455,6 @@ void net_delete() int _processing_suspended = 0; -static double suspend_start = 0; - void net_suspend_processing() { if ( _processing_suspended == 0 ) From aacf84e552fd404c63ecea345a6e44953446ee51 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 22 Jan 2020 13:50:05 +0100 Subject: [PATCH 71/76] Type, util: add `constexpr` to static variables This allows the compiler to move them to section `.rodata`. --- src/Type.cc | 2 +- src/util.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Type.cc b/src/Type.cc index 43f5da6267..6a4139bc8b 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -19,7 +19,7 @@ BroType::TypeAliasMap BroType::type_aliases; // Note: This function must be thread-safe. const char* type_name(TypeTag t) { - static const char* type_names[int(NUM_TYPES)] = { + static constexpr const char* type_names[int(NUM_TYPES)] = { "void", // 0 "bool", // 1 "int", // 2 diff --git a/src/util.cc b/src/util.cc index 18999aa903..cbd635d5a2 100644 --- a/src/util.cc +++ b/src/util.cc @@ -612,7 +612,7 @@ TEST_CASE("util uitoa_n") char* uitoa_n(uint64_t value, char* str, int n, int base, const char* prefix) { - static char dig[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + static constexpr char dig[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; assert(n); From 32bb019e3aa3d3bdd607e322fee2d32ed499ffcf Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 22 Jan 2020 13:50:17 +0100 Subject: [PATCH 72/76] util, nb_dns: fix off-by-one bugs in strncpy() calls Fortunately, these bugs had no effect because the following lines overwrote the last character with a null byte. --- src/nb_dns.c | 2 +- src/util.cc | 2 +- src/util.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nb_dns.c b/src/nb_dns.c index f8d939b4ab..106740bb61 100644 --- a/src/nb_dns.c +++ b/src/nb_dns.c @@ -411,7 +411,7 @@ _nb_dns_mkquery(register struct nb_dns_info *nd, register const char *name, return (-1); } memset(ne, 0, sizeof(*ne)); - strncpy(ne->name, name, sizeof(ne->name)); + strncpy(ne->name, name, sizeof(ne->name) - 1); ne->name[sizeof(ne->name) - 1] = '\0'; ne->qtype = qtype; ne->atype = atype; diff --git a/src/util.cc b/src/util.cc index cbd635d5a2..da056aba4f 100644 --- a/src/util.cc +++ b/src/util.cc @@ -623,7 +623,7 @@ char* uitoa_n(uint64_t value, char* str, int n, int base, const char* prefix) if ( prefix ) { - strncpy(str, prefix, n); + strncpy(str, prefix, n-1); str[n-1] = '\0'; i += strlen(prefix); } diff --git a/src/util.h b/src/util.h index d5b147abe6..e3c8f9c616 100644 --- a/src/util.h +++ b/src/util.h @@ -512,7 +512,7 @@ inline void* safe_malloc(size_t size) inline char* safe_strncpy(char* dest, const char* src, size_t n) { - char* result = strncpy(dest, src, n); + char* result = strncpy(dest, src, n-1); dest[n-1] = '\0'; return result; } From a4588272921ce9c3266d66841cbf51f145ed4826 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 28 Jan 2020 13:58:51 +0100 Subject: [PATCH 73/76] Scope: fix memory leak by removing duplicate copy_string() call The `ID` constructor also calls copy_string(). --- src/Scope.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Scope.cc b/src/Scope.cc index 662a86fda7..d4a87bc658 100644 --- a/src/Scope.cc +++ b/src/Scope.cc @@ -59,7 +59,7 @@ Scope::~Scope() ID* Scope::GenerateTemporary(const char* name) { - return new ID(copy_string(name), SCOPE_FUNCTION, false); + return new ID(name, SCOPE_FUNCTION, false); } id_list* Scope::GetInits() From aac7f6e8f249cd72a114b33cb67bf4ad81633ca3 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 29 Jan 2020 16:03:12 -0800 Subject: [PATCH 74/76] Set Pipe file descriptor flags correctly --- src/Pipe.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Pipe.cc b/src/Pipe.cc index 9957e25dce..f388366c5d 100644 --- a/src/Pipe.cc +++ b/src/Pipe.cc @@ -92,8 +92,8 @@ Pipe::Pipe(int flags0, int flags1, int status_flags0, int status_flags1, pipe_fail(errno); } - flags[0] = set_flags(fds[0], flags[0]); - flags[1] = set_flags(fds[1], flags[1]); + flags[0] = set_flags(fds[0], flags0); + flags[1] = set_flags(fds[1], flags1); status_flags[0] = set_status_flags(fds[0], status_flags0); status_flags[1] = set_status_flags(fds[1], status_flags1); } From fd2c6c56a5dd693eb3387662dcbed94d4fd4fb26 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 29 Jan 2020 16:04:46 -0800 Subject: [PATCH 75/76] Add checks for failed fcntl calls --- src/Pipe.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Pipe.cc b/src/Pipe.cc index f388366c5d..5a0916a3dd 100644 --- a/src/Pipe.cc +++ b/src/Pipe.cc @@ -24,6 +24,9 @@ static int set_flags(int fd, int flags) { auto rval = fcntl(fd, F_GETFD); + if ( rval == -1 ) + pipe_fail(errno); + if ( flags ) { rval |= flags; @@ -39,6 +42,9 @@ static int unset_flags(int fd, int flags) { auto rval = fcntl(fd, F_GETFD); + if ( rval == -1 ) + pipe_fail(errno); + if ( flags ) { rval &= ~flags; @@ -54,6 +60,9 @@ static int set_status_flags(int fd, int flags) { auto rval = fcntl(fd, F_GETFL); + if ( rval == -1 ) + pipe_fail(errno); + if ( flags ) { rval |= flags; From 948cc32844432413aac192d125805c66f0ca13b5 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 29 Jan 2020 16:05:39 -0800 Subject: [PATCH 76/76] Fix leaked FDs in redirecting supervised node stdout/stderr --- src/supervisor/Supervisor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/supervisor/Supervisor.cc b/src/supervisor/Supervisor.cc index 4d971a1068..4d41383a85 100644 --- a/src/supervisor/Supervisor.cc +++ b/src/supervisor/Supervisor.cc @@ -1241,6 +1241,8 @@ void Supervisor::SupervisedNode::Init(zeek::Options* options) const strerror(errno)); exit(1); } + + safe_close(fd); } if ( config.stdout_file ) @@ -1256,6 +1258,8 @@ void Supervisor::SupervisedNode::Init(zeek::Options* options) const strerror(errno)); exit(1); } + + safe_close(fd); } if ( config.cpu_affinity )