mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Use a timer to check for death of supervised node's parent
This commit is contained in:
parent
7ddd311583
commit
dbca14e1fc
6 changed files with 43 additions and 11 deletions
11
src/Net.cc
11
src/Net.cc
|
@ -290,17 +290,6 @@ void net_run()
|
||||||
while ( iosource_mgr->Size() ||
|
while ( iosource_mgr->Size() ||
|
||||||
(BifConst::exit_only_after_terminate && ! terminating) )
|
(BifConst::exit_only_after_terminate && ! terminating) )
|
||||||
{
|
{
|
||||||
// Note: only simple + portable way of detecting loss of parent
|
|
||||||
// process seems to be polling for change in PPID. There's platform
|
|
||||||
// specific ways if we do end up needing something more responsive
|
|
||||||
// and/or have to avoid overhead of polling, but maybe not worth
|
|
||||||
// the additional complexity:
|
|
||||||
// Linux: prctl(PR_SET_PDEATHSIG, ...)
|
|
||||||
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
|
|
||||||
// TODO: make this a proper timer
|
|
||||||
if ( zeek::supervised_node && zeek::supervised_node->parent_pid != getppid() )
|
|
||||||
zeek_terminate_loop("supervised cluster node was orphaned");
|
|
||||||
|
|
||||||
double ts;
|
double ts;
|
||||||
iosource::IOSource* src = iosource_mgr->FindSoonest(&ts);
|
iosource::IOSource* src = iosource_mgr->FindSoonest(&ts);
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "Reporter.h"
|
#include "Reporter.h"
|
||||||
#include "DebugLogger.h"
|
#include "DebugLogger.h"
|
||||||
#include "Val.h"
|
#include "Val.h"
|
||||||
|
#include "Net.h"
|
||||||
#include "NetVar.h"
|
#include "NetVar.h"
|
||||||
#include "zeek-config.h"
|
#include "zeek-config.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
@ -124,6 +125,29 @@ static std::string make_create_message(const Supervisor::NodeConfig& node)
|
||||||
return fmt("create %s %s", node.name.data(), json_str.data());
|
return fmt("create %s %s", node.name.data(), json_str.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ParentProcessCheckTimer::ParentProcessCheckTimer(double t, double arg_interval)
|
||||||
|
: Timer(t, TIMER_PPID_CHECK), interval(arg_interval)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void ParentProcessCheckTimer::Dispatch(double t, int is_expire)
|
||||||
|
{
|
||||||
|
// Note: only simple + portable way of detecting loss of parent
|
||||||
|
// process seems to be polling for change in PPID. There's platform
|
||||||
|
// specific ways if we do end up needing something more responsive
|
||||||
|
// and/or have to avoid overhead of polling, but maybe not worth
|
||||||
|
// the additional complexity:
|
||||||
|
// Linux: prctl(PR_SET_PDEATHSIG, ...)
|
||||||
|
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
|
||||||
|
// Also note the Stem process has its own polling loop with similar logic.
|
||||||
|
if ( zeek::supervised_node->parent_pid != getppid() )
|
||||||
|
zeek_terminate_loop("supervised node was orphaned");
|
||||||
|
|
||||||
|
if ( ! is_expire )
|
||||||
|
timer_mgr->Add(new ParentProcessCheckTimer(network_time + interval,
|
||||||
|
interval));
|
||||||
|
}
|
||||||
|
|
||||||
Supervisor::Supervisor(Supervisor::Config cfg,
|
Supervisor::Supervisor(Supervisor::Config cfg,
|
||||||
std::unique_ptr<bro::PipePair> pipe,
|
std::unique_ptr<bro::PipePair> pipe,
|
||||||
pid_t arg_stem_pid)
|
pid_t arg_stem_pid)
|
||||||
|
@ -714,6 +738,7 @@ std::optional<Supervisor::SupervisedNode> Stem::Poll()
|
||||||
// the additional complexity:
|
// the additional complexity:
|
||||||
// Linux: prctl(PR_SET_PDEATHSIG, ...)
|
// Linux: prctl(PR_SET_PDEATHSIG, ...)
|
||||||
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
|
// FreeBSD: procctl(PROC_PDEATHSIG_CTL)
|
||||||
|
// Also note the similar polling methodology in ParentProcessCheckTimer.
|
||||||
DBG_STEM("Stem suicide");
|
DBG_STEM("Stem suicide");
|
||||||
Shutdown(13);
|
Shutdown(13);
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "iosource/IOSource.h"
|
#include "iosource/IOSource.h"
|
||||||
|
#include "Timer.h"
|
||||||
#include "Pipe.h"
|
#include "Pipe.h"
|
||||||
#include "Flare.h"
|
#include "Flare.h"
|
||||||
#include "NetVar.h"
|
#include "NetVar.h"
|
||||||
|
@ -19,6 +20,18 @@
|
||||||
|
|
||||||
namespace zeek {
|
namespace zeek {
|
||||||
|
|
||||||
|
class ParentProcessCheckTimer : public Timer {
|
||||||
|
public:
|
||||||
|
|
||||||
|
ParentProcessCheckTimer(double t, double arg_interval);
|
||||||
|
|
||||||
|
void Dispatch(double t, int is_expire) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
double interval;
|
||||||
|
};
|
||||||
|
|
||||||
class Supervisor : public iosource::IOSource {
|
class Supervisor : public iosource::IOSource {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ const char* TimerNames[] = {
|
||||||
"TCPConnectionPartialClose",
|
"TCPConnectionPartialClose",
|
||||||
"TCPConnectionResetTimer",
|
"TCPConnectionResetTimer",
|
||||||
"TriggerTimer",
|
"TriggerTimer",
|
||||||
|
"ParentProcessIDCheck",
|
||||||
"TimerMgrExpireTimer",
|
"TimerMgrExpireTimer",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@ enum TimerType {
|
||||||
TIMER_TCP_PARTIAL_CLOSE,
|
TIMER_TCP_PARTIAL_CLOSE,
|
||||||
TIMER_TCP_RESET,
|
TIMER_TCP_RESET,
|
||||||
TIMER_TRIGGER,
|
TIMER_TRIGGER,
|
||||||
|
TIMER_PPID_CHECK,
|
||||||
TIMER_TIMERMGR_EXPIRE,
|
TIMER_TIMERMGR_EXPIRE,
|
||||||
};
|
};
|
||||||
const int NUM_TIMER_TYPES = int(TIMER_TIMERMGR_EXPIRE) + 1;
|
const int NUM_TIMER_TYPES = int(TIMER_TIMERMGR_EXPIRE) + 1;
|
||||||
|
|
|
@ -1481,6 +1481,9 @@ int main(int argc, char** argv)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if ( zeek::supervised_node )
|
||||||
|
timer_mgr->Add(new zeek::ParentProcessCheckTimer(1, 1));
|
||||||
|
|
||||||
double time_net_start = current_time(true);;
|
double time_net_start = current_time(true);;
|
||||||
|
|
||||||
uint64_t mem_net_start_total;
|
uint64_t mem_net_start_total;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue