mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Improve supervisor debug logging
Mainly making stem process debug messages sent up to parent supervisor process and included in its debug.log though option to print to stderr remains in form of environment variable in case debugging breaking change to the IPC mechanism itself.
This commit is contained in:
parent
0ff99c3df8
commit
80b3aef486
3 changed files with 95 additions and 40 deletions
|
@ -6,6 +6,7 @@
|
||||||
#include <poll.h>
|
#include <poll.h>
|
||||||
|
|
||||||
#include <csignal>
|
#include <csignal>
|
||||||
|
#include <cstdarg>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include "Supervisor.h"
|
#include "Supervisor.h"
|
||||||
|
@ -22,6 +23,12 @@ extern "C" {
|
||||||
#include "setsignal.h"
|
#include "setsignal.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define DBG_STEM(args...) stem->LogDebug(args);
|
||||||
|
#else
|
||||||
|
#define DBG_STEM
|
||||||
|
#endif
|
||||||
|
|
||||||
using namespace zeek;
|
using namespace zeek;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -54,6 +61,10 @@ struct Stem {
|
||||||
|
|
||||||
void ReportStatus(const Supervisor::Node& node) const;
|
void ReportStatus(const Supervisor::Node& node) const;
|
||||||
|
|
||||||
|
void LogDebug(const char* format, ...) const __attribute__((format(printf, 2, 3)));
|
||||||
|
|
||||||
|
void LogError(const char* format, ...) const __attribute__((format(printf, 2, 3)));
|
||||||
|
|
||||||
std::unique_ptr<bro::Flare> signal_flare;
|
std::unique_ptr<bro::Flare> signal_flare;
|
||||||
std::unique_ptr<bro::PipePair> pipe;
|
std::unique_ptr<bro::PipePair> pipe;
|
||||||
std::map<std::string, Supervisor::Node> nodes;
|
std::map<std::string, Supervisor::Node> nodes;
|
||||||
|
@ -67,7 +78,7 @@ static Stem* stem = nullptr;
|
||||||
static RETSIGTYPE stem_sig_handler(int signo)
|
static RETSIGTYPE stem_sig_handler(int signo)
|
||||||
{
|
{
|
||||||
// TODO: signal safety
|
// TODO: signal safety
|
||||||
printf("Stem received signal: %d\n", signo);
|
DBG_STEM("Stem received signal: %d", signo);
|
||||||
|
|
||||||
if ( stem->shutting_down )
|
if ( stem->shutting_down )
|
||||||
return RETSIGVAL;
|
return RETSIGVAL;
|
||||||
|
@ -158,6 +169,8 @@ Supervisor::~Supervisor()
|
||||||
reporter->Error("Failed to wait for stem process to exit: %s", tmp);
|
reporter->Error("Failed to wait for stem process to exit: %s", tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while ( ProcessMessages() != 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
void Supervisor::ObserveChildSignal()
|
void Supervisor::ObserveChildSignal()
|
||||||
|
@ -294,7 +307,11 @@ double Supervisor::NextTimestamp(double* local_network_time)
|
||||||
void Supervisor::Process()
|
void Supervisor::Process()
|
||||||
{
|
{
|
||||||
HandleChildSignal();
|
HandleChildSignal();
|
||||||
|
ProcessMessages();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Supervisor::ProcessMessages()
|
||||||
|
{
|
||||||
char buf[256];
|
char buf[256];
|
||||||
int bytes_read = read(stem_pipe->InFD(), buf, 256);
|
int bytes_read = read(stem_pipe->InFD(), buf, 256);
|
||||||
|
|
||||||
|
@ -318,9 +335,15 @@ void Supervisor::Process()
|
||||||
if ( it != nodes.end() )
|
if ( it != nodes.end() )
|
||||||
it->second.pid = std::stoi(msg_tokens[2]);
|
it->second.pid = std::stoi(msg_tokens[2]);
|
||||||
}
|
}
|
||||||
|
else if ( type == "debug" )
|
||||||
|
{
|
||||||
|
// Already logged the unparsed message above.
|
||||||
|
}
|
||||||
else
|
else
|
||||||
reporter->Error("Supervisor got unknown msg: %s", msg.data());
|
reporter->Error("Supervisor got unknown msg: %s", msg.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return msgs.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
Stem::Stem(std::unique_ptr<bro::PipePair> p)
|
Stem::Stem(std::unique_ptr<bro::PipePair> p)
|
||||||
|
@ -343,8 +366,7 @@ Stem::Stem(std::unique_ptr<bro::PipePair> p)
|
||||||
auto res = setpgid(0, 0);
|
auto res = setpgid(0, 0);
|
||||||
|
|
||||||
if ( res == -1 )
|
if ( res == -1 )
|
||||||
fprintf(stderr, "failed to set stem process group: %s\n",
|
LogError("failed to set stem process group: %s", strerror(errno));
|
||||||
strerror(errno));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Stem::~Stem()
|
Stem::~Stem()
|
||||||
|
@ -377,8 +399,8 @@ bool Stem::Wait(Supervisor::Node* node, int options) const
|
||||||
|
|
||||||
if ( res == -1 )
|
if ( res == -1 )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Stem failed to get node exit status %s (%d): %s\n",
|
LogError("Stem failed to get node exit status %s (%d): %s",
|
||||||
node->Name().data(), node->pid, strerror(errno));
|
node->Name().data(), node->pid, strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,18 +408,18 @@ bool Stem::Wait(Supervisor::Node* node, int options) const
|
||||||
{
|
{
|
||||||
node->exit_status = WEXITSTATUS(status);
|
node->exit_status = WEXITSTATUS(status);
|
||||||
// TODO: may be some cases where the node is intended to exit
|
// TODO: may be some cases where the node is intended to exit
|
||||||
printf("node '%s' exited with status %d\n",
|
DBG_STEM("node '%s' exited with status %d",
|
||||||
node->Name().data(), node->exit_status);
|
node->Name().data(), node->exit_status);
|
||||||
}
|
}
|
||||||
else if ( WIFSIGNALED(status) )
|
else if ( WIFSIGNALED(status) )
|
||||||
{
|
{
|
||||||
node->signal_number = WTERMSIG(status);
|
node->signal_number = WTERMSIG(status);
|
||||||
printf("node '%s' terminated by signal %d\n",
|
DBG_STEM("node '%s' terminated by signal %d",
|
||||||
node->Name().data(), node->signal_number);
|
node->Name().data(), node->signal_number);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fprintf(stderr, "Stem failed to get node exit status %s (%d)\n",
|
LogError("Stem failed to get node exit status %s (%d)",
|
||||||
node->Name().data(), node->pid);
|
node->Name().data(), node->pid);
|
||||||
|
|
||||||
node->pid = 0;
|
node->pid = 0;
|
||||||
return true;
|
return true;
|
||||||
|
@ -408,8 +430,8 @@ void Stem::KillNode(const Supervisor::Node& node, int signal) const
|
||||||
auto kill_res = kill(node.pid, signal);
|
auto kill_res = kill(node.pid, signal);
|
||||||
|
|
||||||
if ( kill_res == -1 )
|
if ( kill_res == -1 )
|
||||||
fprintf(stderr, "Failed to send signal to node %s: %s",
|
LogError("Failed to send signal to node %s: %s",
|
||||||
node.Name().data(), strerror(errno));
|
node.Name().data(), strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Stem::Destroy(Supervisor::Node* node) const
|
void Stem::Destroy(Supervisor::Node* node) const
|
||||||
|
@ -427,8 +449,8 @@ void Stem::Destroy(Supervisor::Node* node) const
|
||||||
if ( Wait(node, WNOHANG) )
|
if ( Wait(node, WNOHANG) )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
printf("Stem waiting to destroy node: %s (%d)\n",
|
DBG_STEM("Stem waiting to destroy node: %s (%d)",
|
||||||
node->Name().data(), node->pid);
|
node->Name().data(), node->pid);
|
||||||
sleep(kill_delay);
|
sleep(kill_delay);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -482,8 +504,8 @@ bool Stem::Spawn(Supervisor::Node* node)
|
||||||
|
|
||||||
if ( node_pid == -1 )
|
if ( node_pid == -1 )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "failed to fork Zeek node '%s': %s\n",
|
LogError("failed to fork Zeek node '%s': %s",
|
||||||
node->Name().data(), strerror(errno));
|
node->Name().data(), strerror(errno));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,7 +517,7 @@ bool Stem::Spawn(Supervisor::Node* node)
|
||||||
|
|
||||||
node->pid = node_pid;
|
node->pid = node_pid;
|
||||||
node->spawn_time = std::chrono::steady_clock::now();
|
node->spawn_time = std::chrono::steady_clock::now();
|
||||||
printf("Stem spawned node: %s (%d)\n", node->Name().data(), node->pid);
|
DBG_STEM("Stem spawned node: %s (%d)", node->Name().data(), node->pid);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -529,7 +551,7 @@ void Stem::Shutdown(int exit_code)
|
||||||
if ( ! nodes.empty() )
|
if ( ! nodes.empty() )
|
||||||
{
|
{
|
||||||
KillNodes(sig);
|
KillNodes(sig);
|
||||||
printf("Stem killed nodes with signal %d\n", sig);
|
DBG_STEM("Stem killed nodes with signal %d", sig);
|
||||||
usleep(10);
|
usleep(10);
|
||||||
Reap();
|
Reap();
|
||||||
}
|
}
|
||||||
|
@ -539,8 +561,8 @@ void Stem::Shutdown(int exit_code)
|
||||||
if ( nodes_alive == 0 )
|
if ( nodes_alive == 0 )
|
||||||
exit(exit_code);
|
exit(exit_code);
|
||||||
|
|
||||||
printf("Stem nodes still alive %d, sleeping for %d seconds\n",
|
DBG_STEM("Stem nodes still alive %d, sleeping for %d seconds",
|
||||||
nodes_alive, kill_delay);
|
nodes_alive, kill_delay);
|
||||||
|
|
||||||
auto sleep_time_left = kill_delay;
|
auto sleep_time_left = kill_delay;
|
||||||
|
|
||||||
|
@ -567,6 +589,44 @@ void Stem::ReportStatus(const Supervisor::Node& node) const
|
||||||
safe_write(pipe->OutFD(), msg.data(), msg.size() + 1);
|
safe_write(pipe->OutFD(), msg.data(), msg.size() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Stem::LogDebug(const char* format, ...) const
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
va_start(args, format);
|
||||||
|
auto raw_msg = fmt(format, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
|
if ( getenv("ZEEK_DEBUG_STEM_STDERR") )
|
||||||
|
{
|
||||||
|
// Useful when debugging a breaking change to the IPC mechanism itself.
|
||||||
|
fprintf(stderr, "%s\n", raw_msg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string msg = "debug ";
|
||||||
|
msg += raw_msg;
|
||||||
|
safe_write(pipe->OutFD(), msg.data(), msg.size() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stem::LogError(const char* format, ...) const
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
va_start(args, format);
|
||||||
|
std::string msg = fmt(format, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
|
fprintf(stderr, "%s\n", msg.data());
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
if ( getenv("ZEEK_DEBUG_STEM_STDERR") )
|
||||||
|
// Essentially already emitted above.
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Useful to also insert the error message into the debug log.
|
||||||
|
LogDebug("%s", msg.data());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<Supervisor::NodeConfig> Stem::Run()
|
std::optional<Supervisor::NodeConfig> Stem::Run()
|
||||||
{
|
{
|
||||||
for ( ; ; )
|
for ( ; ; )
|
||||||
|
@ -591,7 +651,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
{
|
{
|
||||||
if ( errno != EINTR )
|
if ( errno != EINTR )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Stem poll() failed: %s\n", strerror(errno));
|
LogError("Stem poll() failed: %s", strerror(errno));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -601,7 +661,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
// TODO: better way to detect loss of parent than polling ?
|
// TODO: better way to detect loss of parent than polling ?
|
||||||
// e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux
|
// e.g. prctl(PR_SET_PDEATHSIG, ...) on Linux
|
||||||
// or procctl(PROC_PDEATHSIG_CTL) on FreeBSD
|
// or procctl(PROC_PDEATHSIG_CTL) on FreeBSD
|
||||||
printf("Stem suicide\n");
|
DBG_STEM("Stem suicide");
|
||||||
Shutdown(13);
|
Shutdown(13);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -634,13 +694,13 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
if ( bytes_read == 0 )
|
if ( bytes_read == 0 )
|
||||||
{
|
{
|
||||||
// EOF, supervisor must have exited
|
// EOF, supervisor must have exited
|
||||||
printf("Stem EOF\n");
|
DBG_STEM("Stem EOF");
|
||||||
Shutdown(14);
|
Shutdown(14);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( bytes_read < 0 )
|
if ( bytes_read < 0 )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Stem read() failed: %s\n", strerror(errno));
|
LogError("Stem read() failed: %s", strerror(errno));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -666,8 +726,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
if ( Spawn(&node) )
|
if ( Spawn(&node) )
|
||||||
return node.config;
|
return node.config;
|
||||||
|
|
||||||
// TODO: get stem printfs going through standard Zeek debug.log
|
DBG_STEM("Stem created node: %s (%d)", node.Name().data(), node.pid);
|
||||||
printf("Stem created node: %s (%d)\n", node.Name().data(), node.pid);
|
|
||||||
ReportStatus(node);
|
ReportStatus(node);
|
||||||
}
|
}
|
||||||
else if ( cmd == "destroy" )
|
else if ( cmd == "destroy" )
|
||||||
|
@ -675,7 +734,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
auto it = nodes.find(node_name);
|
auto it = nodes.find(node_name);
|
||||||
assert(it != nodes.end());
|
assert(it != nodes.end());
|
||||||
auto& node = it->second;
|
auto& node = it->second;
|
||||||
printf("Stem destroying node: %s\n", node_name.data());
|
DBG_STEM("Stem destroying node: %s", node_name.data());
|
||||||
Destroy(&node);
|
Destroy(&node);
|
||||||
nodes.erase(it);
|
nodes.erase(it);
|
||||||
}
|
}
|
||||||
|
@ -684,7 +743,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
auto it = nodes.find(node_name);
|
auto it = nodes.find(node_name);
|
||||||
assert(it != nodes.end());
|
assert(it != nodes.end());
|
||||||
auto& node = it->second;
|
auto& node = it->second;
|
||||||
printf("Stem restarting node: %s\n", node_name.data());
|
DBG_STEM("Stem restarting node: %s", node_name.data());
|
||||||
Destroy(&node);
|
Destroy(&node);
|
||||||
|
|
||||||
if ( Spawn(&node) )
|
if ( Spawn(&node) )
|
||||||
|
@ -693,7 +752,7 @@ std::optional<Supervisor::NodeConfig> Stem::Poll()
|
||||||
ReportStatus(node);
|
ReportStatus(node);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fprintf(stderr, "unknown supervisor message: %s", cmd.data());
|
LogError("Stem got unknown supervisor message: %s", cmd.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -103,6 +103,8 @@ private:
|
||||||
|
|
||||||
void Process() override;
|
void Process() override;
|
||||||
|
|
||||||
|
size_t ProcessMessages();
|
||||||
|
|
||||||
void HandleChildSignal();
|
void HandleChildSignal();
|
||||||
|
|
||||||
void ReapStem();
|
void ReapStem();
|
||||||
|
|
14
src/main.cc
14
src/main.cc
|
@ -1067,17 +1067,11 @@ int main(int argc, char** argv)
|
||||||
if ( options.debug_log_streams )
|
if ( options.debug_log_streams )
|
||||||
{
|
{
|
||||||
debug_logger.EnableStreams(options.debug_log_streams->data());
|
debug_logger.EnableStreams(options.debug_log_streams->data());
|
||||||
const char* debug_log_name = nullptr;
|
|
||||||
|
|
||||||
if ( ! getenv("ZEEK_DEBUG_LOG_STDERR") )
|
if ( getenv("ZEEK_DEBUG_LOG_STDERR") )
|
||||||
{
|
debug_logger.OpenDebugLog(nullptr);
|
||||||
if ( is_supervisor(options) )
|
else
|
||||||
debug_log_name = "debug-supervisor";
|
debug_logger.OpenDebugLog("debug");
|
||||||
else
|
|
||||||
debug_log_name = "debug";
|
|
||||||
}
|
|
||||||
|
|
||||||
debug_logger.OpenDebugLog(debug_log_name);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue