diff --git a/src/Supervisor.cc b/src/Supervisor.cc index 0ce4329861..4d9f163ccb 100644 --- a/src/Supervisor.cc +++ b/src/Supervisor.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include "Supervisor.h" @@ -22,6 +23,12 @@ extern "C" { #include "setsignal.h" } +#ifdef DEBUG +#define DBG_STEM(args...) stem->LogDebug(args); +#else +#define DBG_STEM +#endif + using namespace zeek; namespace { @@ -54,6 +61,10 @@ struct Stem { void ReportStatus(const Supervisor::Node& node) const; + void LogDebug(const char* format, ...) const __attribute__((format(printf, 2, 3))); + + void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3))); + std::unique_ptr signal_flare; std::unique_ptr pipe; std::map nodes; @@ -67,7 +78,7 @@ static Stem* stem = nullptr; static RETSIGTYPE stem_sig_handler(int signo) { // TODO: signal safety - printf("Stem received signal: %d\n", signo); + DBG_STEM("Stem received signal: %d", signo); if ( stem->shutting_down ) return RETSIGVAL; @@ -158,6 +169,8 @@ Supervisor::~Supervisor() reporter->Error("Failed to wait for stem process to exit: %s", tmp); } } + + while ( ProcessMessages() != 0 ); } void Supervisor::ObserveChildSignal() @@ -294,7 +307,11 @@ double Supervisor::NextTimestamp(double* local_network_time) void Supervisor::Process() { HandleChildSignal(); + ProcessMessages(); + } +size_t Supervisor::ProcessMessages() + { char buf[256]; int bytes_read = read(stem_pipe->InFD(), buf, 256); @@ -318,9 +335,15 @@ void Supervisor::Process() if ( it != nodes.end() ) it->second.pid = std::stoi(msg_tokens[2]); } + else if ( type == "debug" ) + { + // Already logged the unparsed message above. + } else reporter->Error("Supervisor got unknown msg: %s", msg.data()); } + + return msgs.size(); } Stem::Stem(std::unique_ptr p) @@ -343,8 +366,7 @@ Stem::Stem(std::unique_ptr p) auto res = setpgid(0, 0); if ( res == -1 ) - fprintf(stderr, "failed to set stem process group: %s\n", - strerror(errno)); + LogError("failed to set stem process group: %s", strerror(errno)); } Stem::~Stem() @@ -377,8 +399,8 @@ bool Stem::Wait(Supervisor::Node* node, int options) const if ( res == -1 ) { - fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n", - node->Name().data(), node->pid, strerror(errno)); + LogError("Stem failed to get node exit status %s (%d): %s", + node->Name().data(), node->pid, strerror(errno)); return false; } @@ -386,18 +408,18 @@ bool Stem::Wait(Supervisor::Node* node, int options) const { node->exit_status = WEXITSTATUS(status); // TODO: may be some cases where the node is intended to exit - printf("node '%s' exited with status %d\n", - node->Name().data(), node->exit_status); + DBG_STEM("node '%s' exited with status %d", + node->Name().data(), node->exit_status); } else if ( WIFSIGNALED(status) ) { node->signal_number = WTERMSIG(status); - printf("node '%s' terminated by signal %d\n", - node->Name().data(), node->signal_number); + DBG_STEM("node '%s' terminated by signal %d", + node->Name().data(), node->signal_number); } else - fprintf(stderr, "Stem failed to get node exit status %s (%d)\n", - node->Name().data(), node->pid); + LogError("Stem failed to get node exit status %s (%d)", + node->Name().data(), node->pid); node->pid = 0; return true; @@ -408,8 +430,8 @@ void Stem::KillNode(const Supervisor::Node& node, int signal) const auto kill_res = kill(node.pid, signal); if ( kill_res == -1 ) - fprintf(stderr, "Failed to send signal to node %s: %s", - node.Name().data(), strerror(errno)); + LogError("Failed to send signal to node %s: %s", + node.Name().data(), strerror(errno)); } void Stem::Destroy(Supervisor::Node* node) const @@ -427,8 +449,8 @@ void Stem::Destroy(Supervisor::Node* node) const if ( Wait(node, WNOHANG) ) break; - printf("Stem waiting to destroy node: %s (%d)\n", - node->Name().data(), node->pid); + DBG_STEM("Stem waiting to destroy node: %s (%d)", + node->Name().data(), node->pid); sleep(kill_delay); } } @@ -482,8 +504,8 @@ bool Stem::Spawn(Supervisor::Node* node) if ( node_pid == -1 ) { - fprintf(stderr, "failed to fork Zeek node '%s': %s\n", - node->Name().data(), strerror(errno)); + LogError("failed to fork Zeek node '%s': %s", + node->Name().data(), strerror(errno)); return false; } @@ -495,7 +517,7 @@ bool Stem::Spawn(Supervisor::Node* node) node->pid = node_pid; node->spawn_time = std::chrono::steady_clock::now(); - printf("Stem spawned node: %s (%d)\n", node->Name().data(), node->pid); + DBG_STEM("Stem spawned node: %s (%d)", node->Name().data(), node->pid); return false; } @@ -529,7 +551,7 @@ void Stem::Shutdown(int exit_code) if ( ! nodes.empty() ) { KillNodes(sig); - printf("Stem killed nodes with signal %d\n", sig); + DBG_STEM("Stem killed nodes with signal %d", sig); usleep(10); Reap(); } @@ -539,8 +561,8 @@ void Stem::Shutdown(int exit_code) if ( nodes_alive == 0 ) exit(exit_code); - printf("Stem nodes still alive %d, sleeping for %d seconds\n", - nodes_alive, kill_delay); + DBG_STEM("Stem nodes still alive %d, sleeping for %d seconds", + nodes_alive, kill_delay); auto sleep_time_left = kill_delay; @@ -567,6 +589,44 @@ void Stem::ReportStatus(const Supervisor::Node& node) const safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); } +void Stem::LogDebug(const char* format, ...) const + { + va_list args; + va_start(args, format); + auto raw_msg = fmt(format, args); + va_end(args); + + if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) + { + // Useful when debugging a breaking change to the IPC mechanism itself. + fprintf(stderr, "%s\n", raw_msg); + return; + } + + std::string msg = "debug "; + msg += raw_msg; + safe_write(pipe->OutFD(), msg.data(), msg.size() + 1); + } + +void Stem::LogError(const char* format, ...) const + { + va_list args; + va_start(args, format); + std::string msg = fmt(format, args); + va_end(args); + + fprintf(stderr, "%s\n", msg.data()); + + #ifdef DEBUG + if ( getenv("ZEEK_DEBUG_STEM_STDERR") ) + // Essentially already emitted above. + return; + + // Useful to also insert the error message into the debug log. + LogDebug("%s", msg.data()); + #endif + } + std::optional Stem::Run() { for ( ; ; ) @@ -591,7 +651,7 @@ std::optional Stem::Poll() { if ( errno != EINTR ) { - fprintf(stderr, "Stem poll() failed: %s\n", strerror(errno)); + LogError("Stem poll() failed: %s", strerror(errno)); return {}; } } @@ -601,7 +661,7 @@ std::optional Stem::Poll() // TODO: better way to detect loss of parent than polling ? // e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux // or procctl(PROC_PDEATHSIG_CTL) on FreeBSD - printf("Stem suicide\n"); + DBG_STEM("Stem suicide"); Shutdown(13); } @@ -634,13 +694,13 @@ std::optional Stem::Poll() if ( bytes_read == 0 ) { // EOF, supervisor must have exited - printf("Stem EOF\n"); + DBG_STEM("Stem EOF"); Shutdown(14); } if ( bytes_read < 0 ) { - fprintf(stderr, "Stem read() failed: %s\n", strerror(errno)); + LogError("Stem read() failed: %s", strerror(errno)); return {}; } @@ -666,8 +726,7 @@ std::optional Stem::Poll() if ( Spawn(&node) ) return node.config; - // TODO: get stem printfs going through standard Zeek debug.log - printf("Stem created node: %s (%d)\n", node.Name().data(), node.pid); + DBG_STEM("Stem created node: %s (%d)", node.Name().data(), node.pid); ReportStatus(node); } else if ( cmd == "destroy" ) @@ -675,7 +734,7 @@ std::optional Stem::Poll() auto it = nodes.find(node_name); assert(it != nodes.end()); auto& node = it->second; - printf("Stem destroying node: %s\n", node_name.data()); + DBG_STEM("Stem destroying node: %s", node_name.data()); Destroy(&node); nodes.erase(it); } @@ -684,7 +743,7 @@ std::optional Stem::Poll() auto it = nodes.find(node_name); assert(it != nodes.end()); auto& node = it->second; - printf("Stem restarting node: %s\n", node_name.data()); + DBG_STEM("Stem restarting node: %s", node_name.data()); Destroy(&node); if ( Spawn(&node) ) @@ -693,7 +752,7 @@ std::optional Stem::Poll() ReportStatus(node); } else - fprintf(stderr, "unknown supervisor message: %s", cmd.data()); + LogError("Stem got unknown supervisor message: %s", cmd.data()); } return {}; diff --git a/src/Supervisor.h b/src/Supervisor.h index 8759c9004f..bcfbdfe568 100644 --- a/src/Supervisor.h +++ b/src/Supervisor.h @@ -103,6 +103,8 @@ private: void Process() override; + size_t ProcessMessages(); + void HandleChildSignal(); void ReapStem(); diff --git a/src/main.cc b/src/main.cc index b6bd67982a..6e8a34fec4 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1067,17 +1067,11 @@ int main(int argc, char** argv) if ( options.debug_log_streams ) { debug_logger.EnableStreams(options.debug_log_streams->data()); - const char* debug_log_name = nullptr; - if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") ) - { - if ( is_supervisor(options) ) - debug_log_name = "debug-supervisor"; - else - debug_log_name = "debug"; - } - - debug_logger.OpenDebugLog(debug_log_name); + if ( getenv("ZEEK_DEBUG_LOG_STDERR") ) + debug_logger.OpenDebugLog(nullptr); + else + debug_logger.OpenDebugLog("debug"); } #endif