Merge remote-tracking branch 'origin/topic/bernhard/thread-cleanup'

* origin/topic/bernhard/thread-cleanup:
  and just to be really sure - always make threads go through OnWaitForStop
  hopefully finally fix last interesting race-condition
  it is apparently getting a bit late for changes at important code...
  spoke to soon (forgot to comment in line again).
  Change thread shutdown again to also work with input framework.
  Changing semantics of thread stop methods.
  Support for cleaning up threads that have terminated.
This commit is contained in:
Robin Sommer 2013-05-15 17:16:41 -07:00
commit 639a6410c6
18 changed files with 218 additions and 95 deletions

View file

@ -117,20 +117,7 @@ void BasicThread::Start()
OnStart();
}
void BasicThread::PrepareStop()
{
if ( ! started )
return;
if ( terminating )
return;
DBG_LOG(DBG_THREADING, "Preparing thread %s to terminate ...", name);
OnPrepareStop();
}
void BasicThread::Stop()
void BasicThread::SignalStop()
{
if ( ! started )
return;
@ -140,7 +127,17 @@ void BasicThread::Stop()
DBG_LOG(DBG_THREADING, "Signaling thread %s to terminate ...", name);
OnStop();
OnSignalStop();
}
void BasicThread::WaitForStop()
{
if ( ! started )
return;
DBG_LOG(DBG_THREADING, "Waiting for thread %s to terminate and process last queue items...", name);
OnWaitForStop();
terminating = true;
}
@ -150,11 +147,12 @@ void BasicThread::Join()
if ( ! started )
return;
if ( ! pthread )
return;
assert(terminating);
DBG_LOG(DBG_THREADING, "Joining thread %s ...", name);
if ( pthread && pthread_join(pthread, 0) != 0 )
if ( pthread_join(pthread, 0) != 0 )
reporter->FatalError("Failure joining thread %s", name);
DBG_LOG(DBG_THREADING, "Joined with thread %s", name);

View file

@ -71,32 +71,33 @@ public:
void Start();
/**
* Signals the thread to prepare for stopping. This must be called
* before Stop() and allows the thread to trigger shutting down
* without yet blocking for doing so.
* Signals the thread to prepare for stopping, but doesn't block to
* wait for that to happen. Use WaitForStop() for that.
*
* The method lets Terminating() now return true, it does however not
* force the thread to terminate. It's up to the Run() method to to
* query Terminating() and exit eventually.
*
* Calling this method has no effect if Start() hasn't been executed
* yet.
*
* Only Bro's main thread must call this method.
*/
void PrepareStop();
void SignalStop();
/**
* Signals the thread to stop. The method lets Terminating() now
* return true. It does however not force the thread to terminate.
* It's up to the Run() method to to query Terminating() and exit
* eventually.
* Waits until a thread has stopped after receiving SignalStop().
*
* Calling this method has no effect if Start() hasn't been executed
* yet.
* yet. If this is executed without calling SignalStop() first,
* results are undefined.
*
* Only Bro's main thread must call this method.
*/
void Stop();
void WaitForStop();
/**
* Returns true if Stop() has been called.
* Returns true if WaitForStop() has been called and finished.
*
* This method is safe to call from any thread.
*/
@ -145,18 +146,19 @@ protected:
virtual void OnStart() {}
/**
* Executed with PrepareStop() (and before OnStop()). This is a hook
* into preparing the thread for stopping. It will be called from
* Bro's main thread before the thread has been signaled to stop.
* Executed with SignalStop(). This is a hook into preparing the
* thread for stopping. It will be called from Bro's main thread
* before the thread has been signaled to stop.
*/
virtual void OnPrepareStop() {}
virtual void OnSignalStop() {}
/**
* Executed with Stop() (and after OnPrepareStop()). This is a hook
* into stopping the thread. It will be called from Bro's main thread
* after the thread has been signaled to stop.
* Executed with WaitForStop(). This is a hook into waiting for the
* thread to stop. It must be overridden by derived classes and only
* return once the thread has indeed finished processing. The method
* will be called from Bro's main thread.
*/
virtual void OnStop() {}
virtual void OnWaitForStop() = 0;
/**
* Executed with Kill(). This is a hook into killing the thread.

View file

@ -32,10 +32,10 @@ void Manager::Terminate()
// Signal all to stop.
for ( all_thread_list::iterator i = all_threads.begin(); i != all_threads.end(); i++ )
(*i)->PrepareStop();
(*i)->SignalStop();
for ( all_thread_list::iterator i = all_threads.begin(); i != all_threads.end(); i++ )
(*i)->Stop();
(*i)->WaitForStop();
// Then join them all.
for ( all_thread_list::iterator i = all_threads.begin(); i != all_threads.end(); i++ )
@ -122,15 +122,10 @@ void Manager::Process()
if ( do_beat )
t->Heartbeat();
while ( t->HasOut() && ! t->Killed() )
while ( t->HasOut() )
{
Message* msg = t->RetrieveOut();
if ( ! msg )
{
assert(t->Killed());
break;
}
assert(msg);
if ( msg->Process() )
{
@ -141,13 +136,40 @@ void Manager::Process()
else
{
reporter->Error("%s failed, terminating thread", msg->Name());
t->Stop();
t->SignalStop();
}
delete msg;
}
}
all_thread_list to_delete;
for ( all_thread_list::iterator i = all_threads.begin(); i != all_threads.end(); i++ )
{
BasicThread* t = *i;
if ( ! t->Killed() )
continue;
to_delete.push_back(t);
}
for ( all_thread_list::iterator i = to_delete.begin(); i != to_delete.end(); i++ )
{
BasicThread* t = *i;
all_threads.remove(t);
MsgThread* mt = dynamic_cast<MsgThread *>(t);
if ( mt )
msg_threads.remove(mt);
t->Join();
delete t;
}
// fprintf(stderr, "P %.6f %.6f do_beat=%d did_process=%d next_next=%.6f\n", network_time, timer_mgr->Time(), do_beat, (int)did_process, next_beat);
}

View file

@ -68,12 +68,6 @@ public:
*/
int NumThreads() const { return all_threads.size(); }
/** Manually triggers processing of any thread input. This can be useful
* if the main thread is waiting for a specific message from a child.
* Usually, though, one should avoid using it.
*/
void ForceProcessing() { Process(); }
/**
* Signals a specific threads to terminate immediately.
*/

View file

@ -30,6 +30,20 @@ private:
double network_time;
};
// Signals main thread that operations shut down.
class FinishedMessage : public OutputMessage<MsgThread>
{
public:
FinishedMessage(MsgThread* thread)
: OutputMessage<MsgThread>("FinishedMessage", thread)
{ }
virtual bool Process() {
Object()->main_finished = true;
return true;
}
};
/// Sends a heartbeat to the child thread.
class HeartbeatMessage : public InputMessage<MsgThread>
{
@ -153,7 +167,8 @@ bool ReporterMessage::Process()
MsgThread::MsgThread() : BasicThread(), queue_in(this, 0), queue_out(0, this)
{
cnt_sent_in = cnt_sent_out = 0;
finished = false;
main_finished = false;
child_finished = false;
failed = false;
thread_mgr->AddMsgThread(this);
}
@ -161,16 +176,16 @@ MsgThread::MsgThread() : BasicThread(), queue_in(this, 0), queue_out(0, this)
// Set by Bro's main signal handler.
extern int signal_val;
void MsgThread::OnPrepareStop()
void MsgThread::OnSignalStop()
{
if ( finished || Killed() )
if ( main_finished || Killed() )
return;
// Signal thread to terminate and wait until it has acknowledged.
// Signal thread to terminate.
SendIn(new FinishMessage(this, network_time), true);
}
void MsgThread::OnStop()
void MsgThread::OnWaitForStop()
{
int signal_count = 0;
int old_signal_val = signal_val;
@ -180,7 +195,7 @@ void MsgThread::OnStop()
uint64_t last_size = 0;
uint64_t cur_size = 0;
while ( ! (finished || Killed() ) )
while ( ! main_finished )
{
// Terminate if we get another kill signal.
if ( signal_val == SIGTERM || signal_val == SIGINT )
@ -205,9 +220,22 @@ void MsgThread::OnStop()
signal_val = 0;
}
queue_in.WakeUp();
if ( ! Killed() )
queue_in.WakeUp();
usleep(1000);
while ( HasOut() )
{
Message* msg = RetrieveOut();
assert ( msg );
if ( ! msg->Process() )
reporter->Error("%s failed during thread termination", msg->Name());
delete msg;
}
if ( ! Killed() )
usleep(1000);
}
signal_val = old_signal_val;
@ -237,9 +265,8 @@ void MsgThread::HeartbeatInChild()
void MsgThread::Finished()
{
// This is thread-safe "enough", we're the only one ever writing
// there.
finished = true;
child_finished = true;
SendOut(new FinishedMessage(this));
}
void MsgThread::Info(const char* msg)
@ -344,7 +371,7 @@ BasicInputMessage* MsgThread::RetrieveIn()
void MsgThread::Run()
{
while ( ! (finished || Killed() ) )
while ( ! (child_finished || Killed() ) )
{
BasicInputMessage* msg = RetrieveIn();
@ -368,10 +395,10 @@ void MsgThread::Run()
}
}
// In case we haven't send the finish method yet, do it now. Reading
// In case we haven't sent the finish method yet, do it now. Reading
// global network_time here should be fine, it isn't changing
// anymore.
if ( ! finished && ! Killed() )
if ( ! child_finished && ! Killed() )
{
OnFinish(network_time);
Finished();

View file

@ -228,8 +228,8 @@ protected:
*
*/
virtual void Run();
virtual void OnStop();
virtual void OnPrepareStop();
virtual void OnWaitForStop();
virtual void OnSignalStop();
virtual void OnKill();
private:
@ -289,7 +289,8 @@ private:
*/
bool MightHaveOut() { return queue_out.MaybeReady(); }
/** Flags that the child process has finished processing. Called from child.
/** Sends a message to the main thread signaling that the child process
* has finished processing. Called from child.
*/
void Finished();
@ -299,7 +300,8 @@ private:
uint64_t cnt_sent_in; // Counts message sent to child.
uint64_t cnt_sent_out; // Counts message sent by child.
bool finished; // Set to true by Finished message.
bool main_finished; // Main thread is finished, meaning child_finished propagated back through message queue.
bool child_finished; // Child thread is finished.
bool failed; // Set to true when a command failed.
};

View file

@ -155,14 +155,11 @@ inline Queue<T>::~Queue()
template<typename T>
inline T Queue<T>::Get()
{
if ( (reader && reader->Killed()) || (writer && writer->Killed()) )
return 0;
safe_lock(&mutex[read_ptr]);
int old_read_ptr = read_ptr;
if ( messages[read_ptr].empty() )
if ( messages[read_ptr].empty() && ! ((reader && reader->Killed()) || (writer && writer->Killed())) )
{
struct timespec ts;
ts.tv_sec = time(0) + 5;
@ -173,6 +170,12 @@ inline T Queue<T>::Get()
return 0;
}
else if ( messages[read_ptr].empty() )
{
safe_unlock(&mutex[read_ptr]);
return 0;
}
T data = messages[read_ptr].front();
messages[read_ptr].pop();