Blob Blame History Raw
From 162ac9b69e9df8708cf6c3ac70704e05264429c4 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Sun, 9 Oct 2016 21:36:40 +0530
Subject: [PATCH 138/141] performance/io-threads: Exit all threads on PARENT_DOWN

Problem:
When glfs_fini() is called on a volume where client.io-threads is enabled,
fini() will free up iothread xl's private structure but there would be some
threads that are sleeping which would wakeup after the timedwait completes
leading to accessing already free'd memory.

Fix:
As part of parent-down, exit all sleeping threads.

Please note that the upstream patch differs from this a little bit,
because least-prio-throttling feature is removed from master, 3.9

 >BUG: 1381830
 >Change-Id: I0bb8d90241112c355fb22ee3fbfd7307f475b339
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/15620
 >Smoke: Gluster Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
 >(cherry picked from commit d7a5ca16911caca03cec1112d4be56a9cda2ee30)

BUG: 1380619
Change-Id: I450e5db84c83fae3237aaa7915c08cd3ee9bde2c
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/87972
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
 xlators/performance/io-threads/src/io-threads.c |   71 +++++++++++++++-------
 xlators/performance/io-threads/src/io-threads.h |    1 +
 2 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index c81a97d..394be8a 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -153,9 +153,8 @@ iot_worker (void *data)
         struct timespec   sleep_till = {0, };
         int               ret = 0;
         int               pri = -1;
-        char              timeout = 0;
-        char              bye = 0;
 	struct timespec	  sleep = {0,};
+        gf_boolean_t      bye = _gf_false;
 
         conf = data;
         this = conf->this;
@@ -171,6 +170,11 @@ iot_worker (void *data)
                                 pri = -1;
                         }
                         while (conf->queue_size == 0) {
+                                if (conf->down) {
+                                        bye = _gf_true;/*Avoid sleep*/
+                                        break;
+                                }
+
                                 conf->sleep_count++;
 
                                 ret = pthread_cond_timedwait (&conf->cond,
@@ -178,48 +182,48 @@ iot_worker (void *data)
                                                               &sleep_till);
                                 conf->sleep_count--;
 
-                                if (ret == ETIMEDOUT) {
-                                        timeout = 1;
+                                if (conf->down || ret == ETIMEDOUT) {
+                                        bye = _gf_true;
                                         break;
                                 }
                         }
 
-                        if (timeout) {
-                                if (conf->curr_count > IOT_MIN_THREADS) {
+                        if (bye) {
+                                if (conf->down ||
+                                    conf->curr_count > IOT_MIN_THREADS) {
                                         conf->curr_count--;
-                                        bye = 1;
+                                        if (conf->curr_count == 0)
+                                           pthread_cond_broadcast (&conf->cond);
                                         gf_msg_debug (conf->this->name, 0,
-                                                      "timeout, terminated. conf->curr_count=%d",
+                                                      "terminated. "
+                                                      "conf->curr_count=%d",
                                                       conf->curr_count);
                                 } else {
-                                        timeout = 0;
+                                        bye = _gf_false;
                                 }
                         }
 
-                        stub = __iot_dequeue (conf, &pri, &sleep);
-			if (!stub && (sleep.tv_sec || sleep.tv_nsec)) {
-				pthread_cond_timedwait(&conf->cond,
-						       &conf->mutex, &sleep);
-				pthread_mutex_unlock(&conf->mutex);
-				continue;
-			}
+                        if (!bye) {
+                                stub = __iot_dequeue (conf, &pri, &sleep);
+                                if (!stub && (sleep.tv_sec || sleep.tv_nsec)) {
+                                        pthread_cond_timedwait(&conf->cond,
+                                                               &conf->mutex,
+                                                               &sleep);
+                                        pthread_mutex_unlock(&conf->mutex);
+                                        continue;
+                                }
+                        }
                 }
                 pthread_mutex_unlock (&conf->mutex);
 
                 if (stub) /* guard against spurious wakeups */
                         call_resume (stub);
+                stub = NULL;
 
                 if (bye)
                         break;
         }
 
-        if (pri != -1) {
-                pthread_mutex_lock (&conf->mutex);
-                {
-                        conf->ac_iot_count[pri]--;
-                }
-                pthread_mutex_unlock (&conf->mutex);
-        }
         return NULL;
 }
 
@@ -1021,6 +1025,27 @@ out:
 	return ret;
 }
 
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+        iot_conf_t *conf = this->private;
+
+        if (GF_EVENT_PARENT_DOWN == event) {
+                pthread_mutex_lock (&conf->mutex);
+                {
+                        conf->down = _gf_true;
+                        /*Let all the threads know that xl is going down*/
+                        pthread_cond_broadcast (&conf->cond);
+                        while (conf->curr_count)/*Wait for threads to exit*/
+                                pthread_cond_wait (&conf->cond, &conf->mutex);
+                }
+                pthread_mutex_unlock (&conf->mutex);
+        }
+
+        default_notify (this, event, data);
+
+        return 0;
+}
 
 void
 fini (xlator_t *this)
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index d8eea2c..cb984f0 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -79,6 +79,7 @@ struct iot_conf {
         xlator_t            *this;
         size_t              stack_size;
 
+        gf_boolean_t         down; /*PARENT_DOWN event is notified*/
 	struct iot_least_throttle throttle;
 };
 
-- 
1.7.1