Add supervisor btests

This commit is contained in:
Jon Siwek 2020-01-16 19:21:53 -08:00
parent 21c75b46eb
commit 1972190b89
30 changed files with 546 additions and 14 deletions

View file

@ -32,7 +32,7 @@ export {
type NodeStatus: record {
node: NodeConfig;
pid: count;
pid: int &optional;
};
type Status: record {

View file

@ -50,9 +50,9 @@ struct Stem {
int AliveNodeCount() const;
void KillNodes(int signal) const;
void KillNodes(int signal);
void KillNode(const Supervisor::Node& node, int signal) const;
void KillNode(Supervisor::Node* node, int signal) const;
void Destroy(Supervisor::Node* node) const;
@ -476,7 +476,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const
DBG_STEM("node '%s' (PID %d) exited with status %d",
node->Name().data(), node->pid, node->exit_status);
if ( ! shutting_down )
if ( ! node->killed )
LogError("Supervised node '%s' (PID %d) exited prematurely with status %d",
node->Name().data(), node->pid, node->exit_status);
}
@ -486,7 +486,7 @@ bool Stem::Wait(Supervisor::Node* node, int options) const
DBG_STEM("node '%s' (PID %d) terminated by signal %d",
node->Name().data(), node->pid, node->signal_number);
if ( ! shutting_down )
if ( ! node->killed )
LogError("Supervised node '%s' (PID %d) terminated prematurely by signal %d",
node->Name().data(), node->pid, node->signal_number);
}
@ -498,13 +498,14 @@ bool Stem::Wait(Supervisor::Node* node, int options) const
return true;
}
void Stem::KillNode(const Supervisor::Node& node, int signal) const
void Stem::KillNode(Supervisor::Node* node, int signal) const
{
auto kill_res = kill(node.pid, signal);
node->killed = true;
auto kill_res = kill(node->pid, signal);
if ( kill_res == -1 )
LogError("Failed to send signal to node '%s' (PID %d): %s",
node.Name().data(), node.pid, strerror(errno));
node->Name().data(), node->pid, strerror(errno));
}
void Stem::Destroy(Supervisor::Node* node) const
@ -516,7 +517,7 @@ void Stem::Destroy(Supervisor::Node* node) const
for ( ; ; )
{
auto sig = kill_attempts++ < max_term_attempts ? SIGTERM : SIGKILL;
KillNode(*node, sig);
KillNode(node, sig);
usleep(10);
if ( Wait(node, WNOHANG) )
@ -615,10 +616,10 @@ int Stem::AliveNodeCount() const
return rval;
}
void Stem::KillNodes(int signal) const
void Stem::KillNodes(int signal)
{
for ( const auto& n : nodes )
KillNode(n.second, signal);
for ( auto& n : nodes )
KillNode(&n.second, signal);
}
void Stem::Shutdown(int exit_code)
@ -1061,7 +1062,7 @@ IntrusivePtr<RecordVal> Supervisor::Node::ToRecord() const
rval->Assign(rt->FieldOffset("node"), config.ToRecord().detach());
if ( pid )
rval->Assign(rt->FieldOffset("pid"), val_mgr->GetCount(pid));
rval->Assign(rt->FieldOffset("pid"), val_mgr->GetInt(pid));
return rval;
}

View file

@ -81,6 +81,7 @@ public:
NodeConfig config;
pid_t pid = 0;
bool killed = false;
int exit_status = 0;
int signal_number = 0;
int revival_attempts = 0;

View file

@ -0,0 +1,2 @@
supervised node zeek_init(), logger-1, Cluster::LOGGER
supervised node zeek_done(), logger-1

View file

@ -0,0 +1,2 @@
supervised node zeek_init(), manager, Cluster::MANAGER
supervised node zeek_done(), manager

View file

@ -0,0 +1,2 @@
supervised node zeek_init(), proxy-1, Cluster::PROXY
supervised node zeek_done(), proxy-1

View file

@ -0,0 +1,3 @@
supervisor zeek_init()
shutting down
supervisor zeek_done()

View file

@ -0,0 +1,2 @@
supervised node zeek_init(), worker-1, Cluster::WORKER
supervised node zeek_done(), worker-1

View file

@ -0,0 +1,2 @@
supervised node zeek_init()
supervised node zeek_done()

View file

@ -0,0 +1,3 @@
supervisor zeek_init()
destroying node
supervisor zeek_done()

View file

@ -0,0 +1,3 @@
(stderr) supervised node zeek_init()
received termination signal
(stderr) supervised node zeek_done()

View file

@ -0,0 +1,2 @@
(stdout) supervised node zeek_init()
(stdout) supervised node zeek_done()

View file

@ -0,0 +1,3 @@
supervisor zeek_init()
destroying node
supervisor zeek_done()

View file

@ -0,0 +1,3 @@
supervised node zeek_init()
supervised node loaded qux.zeek
supervised node zeek_done()

View file

@ -0,0 +1,3 @@
supervisor zeek_init()
destroying node
supervisor zeek_done()

View file

@ -0,0 +1,2 @@
supervised node zeek_init()
supervised node zeek_done()

View file

@ -0,0 +1,2 @@
supervisor zeek_init()
supervisor zeek_done()

View file

@ -0,0 +1,2 @@
supervised node zeek_init()
supervised node zeek_done()

View file

@ -0,0 +1,3 @@
supervisor zeek_init()
destroying node
supervisor zeek_done()

View file

@ -0,0 +1,3 @@
got supervised node status, grault
got supervised node status, grault
got supervised node status, grault

View file

@ -0,0 +1 @@
got supervised node status, grault

View file

@ -1,5 +1,5 @@
[btest]
TestDirs = doc bifs language core scripts coverage signatures plugins broker
TestDirs = doc bifs language core scripts coverage signatures plugins broker supervisor
TmpDir = %(testbase)s/.tmp
BaselineDir = %(testbase)s/Baseline
IgnoreDirs = .svn CVS .tmp

View file

@ -0,0 +1,89 @@
# @TEST-PORT: SUPERVISOR_PORT
# @TEST-PORT: MANAGER_PORT
# @TEST-PORT: LOGGER_PORT
# @TEST-PORT: PROXY_PORT
# @TEST-PORT: WORKER_PORT
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/manager/stdout
# @TEST-EXEC: btest-diff zeek/logger-1/stdout
# @TEST-EXEC: btest-diff zeek/worker-1/stdout
# @TEST-EXEC: btest-diff zeek/proxy-1/stdout
@load base/frameworks/cluster
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global topic = "test-topic";
global peer_count = 0;
event shutdown()
{
print supervisor_output_file, "shutting down";
terminate();
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
Broker::subscribe(topic);
Broker::listen("127.0.0.1", to_port(getenv("SUPERVISOR_PORT")));
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local cluster: table[string] of Supervisor::ClusterEndpoint;
cluster["manager"] = [$role=Supervisor::MANAGER, $host=127.0.0.1,
$p=to_port(getenv("MANAGER_PORT"))];
cluster["logger-1"] = [$role=Supervisor::LOGGER, $host=127.0.0.1,
$p=to_port(getenv("LOGGER_PORT"))];
cluster["proxy-1"] = [$role=Supervisor::PROXY, $host=127.0.0.1,
$p=to_port(getenv("PROXY_PORT"))];
cluster["worker-1"] = [$role=Supervisor::WORKER, $host=127.0.0.1,
$p=to_port(getenv("WORKER_PORT"))];
for ( n, ep in cluster )
{
local sn = Supervisor::NodeConfig($name = n);
sn$cluster = cluster;
sn$directory = n;
sn$stdout_file = "stdout";
sn$stderr_file = "stderr";
local res = Supervisor::create(sn);
if ( res != "" )
print fmt("failed to create node %s: %s", n, res);
}
}
else
{
Broker::peer("127.0.0.1", to_port(getenv("SUPERVISOR_PORT")));
print "supervised node zeek_init()", Cluster::node, Cluster::local_node_type();
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
++peer_count;
if ( Supervisor::is_supervised() )
{
if ( Cluster::node == "manager" && peer_count == 4 )
Broker::publish(topic, shutdown);
}
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
print "supervised node zeek_done()", Cluster::node;
else
print supervisor_output_file, "supervisor zeek_done()";
}

View file

@ -0,0 +1,60 @@
# @TEST-PORT: BROKER_PORT
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/qux/node.out
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global node_output_file: file;
global topic = "test-topic";
event do_destroy()
{
print supervisor_output_file, "destroying node";
Supervisor::destroy("grault");
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
Broker::subscribe(topic);
Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT")));
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local sn = Supervisor::NodeConfig($name="grault", $directory="qux");
local res = Supervisor::create(sn);
if ( res != "" )
print supervisor_output_file, res;
}
else
{
Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT")));
node_output_file = open("node.out");
print node_output_file, "supervised node zeek_init()";
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
if ( Supervisor::is_supervised() )
Broker::publish(topic, do_destroy);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
# Should only be run by supervisor
terminate();
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
print node_output_file, "supervised node zeek_done()";
else
print supervisor_output_file, "supervisor zeek_done()";
}

View file

@ -0,0 +1,66 @@
# @TEST-PORT: BROKER_PORT
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/qux/grault.stdout
# @TEST-EXEC: btest-diff zeek/qux/grault.stderr
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global topic = "test-topic";
global stderr = open("/dev/stderr");
event do_destroy()
{
print supervisor_output_file, "destroying node";
Supervisor::destroy("grault");
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
Broker::subscribe(topic);
Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT")));
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local sn = Supervisor::NodeConfig($name="grault", $directory="qux",
$stdout_file="grault.stdout",
$stderr_file="grault.stderr");
local res = Supervisor::create(sn);
if ( res != "" )
print supervisor_output_file, res;
}
else
{
Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT")));
print "(stdout) supervised node zeek_init()";
print stderr, "(stderr) supervised node zeek_init()";
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
if ( Supervisor::is_supervised() )
Broker::publish(topic, do_destroy);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
# Should only be run by supervisor
terminate();
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
{
print "(stdout) supervised node zeek_done()";
print stderr, "(stderr) supervised node zeek_done()";
}
else
print supervisor_output_file, "supervisor zeek_done()";
}

View file

@ -0,0 +1,70 @@
# @TEST-PORT: BROKER_PORT
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/node.out
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global node_output_file: file;
global topic = "test-topic";
event do_destroy()
{
print supervisor_output_file, "destroying node";
Supervisor::destroy("grault");
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
Broker::subscribe(topic);
Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT")));
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local sn = Supervisor::NodeConfig($name="grault",
$scripts=vector("../qux.zeek"));
local res = Supervisor::create(sn);
if ( res != "" )
print supervisor_output_file, res;
}
else
{
Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT")));
node_output_file = open("node.out");
print node_output_file, "supervised node zeek_init()";
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
if ( Supervisor::is_supervised() )
Broker::publish(topic, do_destroy);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
# Should only be run by supervisor
terminate();
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
print node_output_file, "supervised node zeek_done()";
else
print supervisor_output_file, "supervisor zeek_done()";
}
@TEST-START-FILE qux.zeek
event zeek_init() &priority=-10
{
print node_output_file, "supervised node loaded qux.zeek";
}
@TEST-END-FILE

View file

@ -0,0 +1,42 @@
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/node.out
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global node_output_file: file;
event zeek_init()
{
local pid_file = "supervisor.pid";
if ( Supervisor::is_supervisor() )
{
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local f = open(pid_file);
print f, getpid();
local sn = Supervisor::NodeConfig($name="grault");
local res = Supervisor::create(sn);
if ( res != "" )
print supervisor_output_file, res;
}
else
{
node_output_file = open("node.out");
print node_output_file, "supervised node zeek_init()";
system(fmt("kill `cat %s`", pid_file));
}
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
print node_output_file, "supervised node zeek_done()";
else
print supervisor_output_file, "supervisor zeek_done()";
}

View file

@ -0,0 +1,60 @@
# @TEST-PORT: BROKER_PORT
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/supervisor.out
# @TEST-EXEC: btest-diff zeek/node.out
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global supervisor_output_file: file;
global node_output_file: file;
global topic = "test-topic";
event do_destroy()
{
print supervisor_output_file, "destroying node";
Supervisor::destroy("grault");
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
Broker::subscribe(topic);
Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT")));
supervisor_output_file = open("supervisor.out");
print supervisor_output_file, "supervisor zeek_init()";
local sn = Supervisor::NodeConfig($name="grault");
local res = Supervisor::create(sn);
if ( res != "" )
print supervisor_output_file, res;
}
else
{
Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT")));
node_output_file = open("node.out");
print node_output_file, "supervised node zeek_init()";
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
if ( Supervisor::is_supervised() )
Broker::publish(topic, do_destroy);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
# Should only be run by supervisor
terminate();
}
event zeek_done()
{
if ( Supervisor::is_supervised() )
print node_output_file, "supervised node zeek_done()";
else
print supervisor_output_file, "supervisor zeek_done()";
}

View file

@ -0,0 +1,66 @@
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/.stdout
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
global node_pid: int = 0;
global status_count = 0;
global check_interval = 0.1sec;
event check_status(name: string &default="")
{
local s = Supervisor::status(name);
local ns = s$nodes["grault"];
if ( ! ns?$pid )
{
schedule check_interval { check_status() };
return;
}
if ( status_count > 0 && node_pid == ns$pid )
{
schedule check_interval { check_status() };
return;
}
print "got supervised node status", ns$node$name;
node_pid = ns$pid;
++status_count;
if ( status_count == 1 )
{
Supervisor::restart();
schedule check_interval { check_status() };
}
else if ( status_count == 2 )
{
Supervisor::restart("grault");
schedule check_interval { check_status("grault") };
}
else
terminate();
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
local sn = Supervisor::NodeConfig($name="grault");
local res = Supervisor::create(sn);
if ( res != "" )
print "failed to create node", res;
sn$name = "qux";
res = Supervisor::create(sn);
if ( res != "" )
print "failed to create node", res;
event check_status();
}
}

View file

@ -0,0 +1,34 @@
# @TEST-EXEC: btest-bg-run zeek zeek -j -b %INPUT
# @TEST-EXEC: btest-bg-wait 20
# @TEST-EXEC: btest-diff zeek/.stdout
# So the supervised node doesn't terminate right away.
redef exit_only_after_terminate=T;
event check_status()
{
local s = Supervisor::status();
local ns = s$nodes["grault"];
if ( ! ns?$pid )
schedule 0.25sec { check_status() };
else
{
print "got supervised node status", ns$node$name;
terminate();
}
}
event zeek_init()
{
if ( Supervisor::is_supervisor() )
{
local sn = Supervisor::NodeConfig($name="grault");
local res = Supervisor::create(sn);
if ( res != "" )
print "failed to create node", res;
event check_status();
}
}