Use a timer to check for death of supervised node's parent

This commit is contained in:
Jon Siwek 2020-01-15 15:27:53 -08:00
parent 7ddd311583
commit dbca14e1fc
6 changed files with 43 additions and 11 deletions

View file

@ -290,17 +290,6 @@ void net_run()
while ( iosource_mgr->Size() ||
(BifConst::exit_only_after_terminate && ! terminating) )
{
// Note: only simple + portable way of detecting loss of parent
// process seems to be polling for change in PPID. There's platform
// specific ways if we do end up needing something more responsive
// and/or have to avoid overhead of polling, but maybe not worth
// the additional complexity:
// Linux: prctl(PR_SET_PDEATHSIG, ...)
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
// TODO: make this a proper timer
if ( zeek::supervised_node && zeek::supervised_node->parent_pid != getppid() )
zeek_terminate_loop("supervised cluster node was orphaned");
double ts;
iosource::IOSource* src = iosource_mgr->FindSoonest(&ts);

View file

@ -13,6 +13,7 @@
#include "Reporter.h"
#include "DebugLogger.h"
#include "Val.h"
#include "Net.h"
#include "NetVar.h"
#include "zeek-config.h"
#include "util.h"
@ -124,6 +125,29 @@ static std::string make_create_message(const Supervisor::NodeConfig& node)
return fmt("create %s %s", node.name.data(), json_str.data());
}
ParentProcessCheckTimer::ParentProcessCheckTimer(double t, double arg_interval)
: Timer(t, TIMER_PPID_CHECK), interval(arg_interval)
{
}
void ParentProcessCheckTimer::Dispatch(double t, int is_expire)
{
// Note: only simple + portable way of detecting loss of parent
// process seems to be polling for change in PPID. There's platform
// specific ways if we do end up needing something more responsive
// and/or have to avoid overhead of polling, but maybe not worth
// the additional complexity:
// Linux: prctl(PR_SET_PDEATHSIG, ...)
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
// Also note the Stem process has its own polling loop with similar logic.
if ( zeek::supervised_node->parent_pid != getppid() )
zeek_terminate_loop("supervised node was orphaned");
if ( ! is_expire )
timer_mgr->Add(new ParentProcessCheckTimer(network_time + interval,
interval));
}
Supervisor::Supervisor(Supervisor::Config cfg,
std::unique_ptr<bro::PipePair> pipe,
pid_t arg_stem_pid)
@ -714,6 +738,7 @@ std::optional<Supervisor::SupervisedNode> Stem::Poll()
// the additional complexity:
// Linux: prctl(PR_SET_PDEATHSIG, ...)
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
// Also note the similar polling methodology in ParentProcessCheckTimer.
DBG_STEM("Stem suicide");
Shutdown(13);
}

View file

@ -12,6 +12,7 @@
#include <map>
#include "iosource/IOSource.h"
#include "Timer.h"
#include "Pipe.h"
#include "Flare.h"
#include "NetVar.h"
@ -19,6 +20,18 @@
namespace zeek {
class ParentProcessCheckTimer : public Timer {
public:
ParentProcessCheckTimer(double t, double arg_interval);
void Dispatch(double t, int is_expire) override;
protected:
double interval;
};
class Supervisor : public iosource::IOSource {
public:

View file

@ -37,6 +37,7 @@ const char* TimerNames[] = {
"TCPConnectionPartialClose",
"TCPConnectionResetTimer",
"TriggerTimer",
"ParentProcessIDCheck",
"TimerMgrExpireTimer",
};

View file

@ -41,6 +41,7 @@ enum TimerType {
TIMER_TCP_PARTIAL_CLOSE,
TIMER_TCP_RESET,
TIMER_TRIGGER,
TIMER_PPID_CHECK,
TIMER_TIMERMGR_EXPIRE,
};
const int NUM_TIMER_TYPES = int(TIMER_TIMERMGR_EXPIRE) + 1;

View file

@ -1481,6 +1481,9 @@ int main(int argc, char** argv)
#endif
if ( zeek::supervised_node )
timer_mgr->Add(new zeek::ParentProcessCheckTimer(1, 1));
double time_net_start = current_time(true);;
uint64_t mem_net_start_total;