f338ef
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
f338ef
From: Mohit Agrawal <moagrawal@redhat.com>
f338ef
Date: Tue, 16 Jul 2019 20:36:57 +0530
f338ef
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
f338ef
 volume in loop
f338ef
f338ef
Problem: In brick_mux environment sometime brick is crashed while
f338ef
         volume stop/start in a loop.Brick is crashed in janitor task
f338ef
         at the time of accessing priv.If posix priv is cleaned up before
f338ef
         call janitor task then janitor task is crashed.
f338ef
f338ef
Solution: To avoid the crash in brick_mux environment introduce a new
f338ef
          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
f338ef
          wait for update the flag by janitor_task_done
f338ef
f338ef
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
f338ef
> fixes: bz#1730409
f338ef
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
f338ef
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
f338ef
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
f338ef
f338ef
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
f338ef
fixex: bz#1729971
f338ef
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
f338ef
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
f338ef
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
f338ef
Tested-by: RHGS Build Bot <nigelb@redhat.com>
f338ef
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
f338ef
---
f338ef
 libglusterfs/src/glusterfs/xlator.h        |  3 +++
f338ef
 xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
f338ef
 xlators/protocol/server/src/server.c       |  6 ++++-
f338ef
 xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
f338ef
 xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
f338ef
 xlators/storage/posix/src/posix.h          |  3 +++
f338ef
 6 files changed, 69 insertions(+), 4 deletions(-)
f338ef
f338ef
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
f338ef
index b78daad..da551e9 100644
f338ef
--- a/libglusterfs/src/glusterfs/xlator.h
f338ef
+++ b/libglusterfs/src/glusterfs/xlator.h
f338ef
@@ -861,6 +861,9 @@ struct _xlator {
f338ef
 
f338ef
     /* Flag to notify got CHILD_DOWN event for detach brick */
f338ef
     uint32_t notify_down;
f338ef
+
f338ef
+    /* Flag to avoid throw duplicate PARENT_DOWN event */
f338ef
+    uint32_t parent_down;
f338ef
 };
f338ef
 
f338ef
 /* This would be the only structure which needs to be exported by
f338ef
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
f338ef
index 2aa975b..812c698 100644
f338ef
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
f338ef
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
f338ef
@@ -4082,8 +4082,9 @@ out:
f338ef
     if (msg[0]) {
f338ef
         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
f338ef
                msg);
f338ef
-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
f338ef
-                 new_brickinfo->hostname, new_brickinfo->path);
f338ef
+        if (new_brickinfo)
f338ef
+            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
f338ef
+                     new_brickinfo->hostname, new_brickinfo->path);
f338ef
     }
f338ef
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
f338ef
     return ret;
f338ef
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
f338ef
index 6ae63ba..a5f09fe 100644
f338ef
--- a/xlators/protocol/server/src/server.c
f338ef
+++ b/xlators/protocol/server/src/server.c
f338ef
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
f338ef
     gf_boolean_t victim_found = _gf_false;
f338ef
     xlator_list_t **trav_p = NULL;
f338ef
     xlator_t *top = NULL;
f338ef
+    uint32_t parent_down = 0;
f338ef
 
f338ef
     GF_ASSERT(data);
f338ef
 
f338ef
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
f338ef
         victim = (*trav_p)->xlator;
f338ef
         if (victim->cleanup_starting &&
f338ef
             strcmp(victim->name, victim_name) == 0) {
f338ef
-            victim_found = _gf_true;
f338ef
+            parent_down = victim->parent_down;
f338ef
+            victim->parent_down = 1;
f338ef
+            if (!parent_down)
f338ef
+                victim_found = _gf_true;
f338ef
             break;
f338ef
         }
f338ef
     }
f338ef
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
f338ef
index d738692..69857d9 100644
f338ef
--- a/xlators/storage/posix/src/posix-common.c
f338ef
+++ b/xlators/storage/posix/src/posix-common.c
f338ef
@@ -146,10 +146,15 @@ int32_t
f338ef
 posix_notify(xlator_t *this, int32_t event, void *data, ...)
f338ef
 {
f338ef
     xlator_t *victim = data;
f338ef
+    struct posix_private *priv = this->private;
f338ef
+    int ret = 0;
f338ef
+    struct timespec sleep_till = {
f338ef
+        0,
f338ef
+    };
f338ef
 
f338ef
     switch (event) {
f338ef
         case GF_EVENT_PARENT_UP: {
f338ef
-            /* Tell the parent that posix xlator is up */
f338ef
+            /* the parent that posix xlator is up */
f338ef
             default_notify(this, GF_EVENT_CHILD_UP, data);
f338ef
         } break;
f338ef
 
f338ef
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
f338ef
                 break;
f338ef
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
f338ef
                    victim->name);
f338ef
+
f338ef
+            if (priv->janitor) {
f338ef
+                pthread_mutex_lock(&priv->janitor_mutex);
f338ef
+                {
f338ef
+                    priv->janitor_task_stop = _gf_true;
f338ef
+                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
f338ef
+                                          priv->janitor);
f338ef
+                    if (!ret) {
f338ef
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
f338ef
+                        sleep_till.tv_sec += 1;
f338ef
+                        /* Wait to set janitor_task flag to _gf_false by
f338ef
+                         * janitor_task_done */
f338ef
+                        while (priv->janitor_task_stop) {
f338ef
+                            (void)pthread_cond_timedwait(&priv->janitor_cond,
f338ef
+                                                         &priv->janitor_mutex,
f338ef
+                                                         &sleep_till);
f338ef
+                            clock_gettime(CLOCK_REALTIME, &sleep_till);
f338ef
+                            sleep_till.tv_sec += 1;
f338ef
+                        }
f338ef
+                    }
f338ef
+                }
f338ef
+                pthread_mutex_unlock(&priv->janitor_mutex);
f338ef
+                GF_FREE(priv->janitor);
f338ef
+            }
f338ef
+            priv->janitor = NULL;
f338ef
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
f338ef
         } break;
f338ef
         default:
f338ef
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
f338ef
 
f338ef
     pthread_mutex_init(&_private->fsync_mutex, NULL);
f338ef
     pthread_cond_init(&_private->fsync_cond, NULL);
f338ef
+    pthread_mutex_init(&_private->janitor_mutex, NULL);
f338ef
+    pthread_cond_init(&_private->janitor_cond, NULL);
f338ef
     INIT_LIST_HEAD(&_private->fsyncs);
f338ef
     ret = posix_spawn_ctx_janitor_thread(this);
f338ef
     if (ret)
f338ef
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
f338ef
         (void)gf_thread_cleanup_xint(priv->disk_space_check);
f338ef
         priv->disk_space_check = 0;
f338ef
     }
f338ef
+
f338ef
     if (priv->janitor) {
f338ef
         /*TODO: Make sure the synctask is also complete */
f338ef
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
f338ef
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
f338ef
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
f338ef
                    "Failed to delete janitor timer");
f338ef
         }
f338ef
+        GF_FREE(priv->janitor);
f338ef
         priv->janitor = NULL;
f338ef
     }
f338ef
+
f338ef
     if (priv->fsyncer) {
f338ef
         (void)gf_thread_cleanup_xint(priv->fsyncer);
f338ef
         priv->fsyncer = 0;
f338ef
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
f338ef
     GF_FREE(priv->base_path);
f338ef
     LOCK_DESTROY(&priv->lock);
f338ef
     pthread_mutex_destroy(&priv->fsync_mutex);
f338ef
+    pthread_cond_destroy(&priv->fsync_cond);
f338ef
+    pthread_mutex_destroy(&priv->janitor_mutex);
f338ef
+    pthread_cond_destroy(&priv->janitor_cond);
f338ef
     GF_FREE(priv->hostname);
f338ef
     GF_FREE(priv->trash_path);
f338ef
     GF_FREE(priv);
f338ef
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
f338ef
index 07169b5..ef5bfd5 100644
f338ef
--- a/xlators/storage/posix/src/posix-helpers.c
f338ef
+++ b/xlators/storage/posix/src/posix-helpers.c
f338ef
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
f338ef
     this = data;
f338ef
     priv = this->private;
f338ef
 
f338ef
+    pthread_mutex_lock(&priv->janitor_mutex);
f338ef
+    {
f338ef
+        if (priv->janitor_task_stop) {
f338ef
+            priv->janitor_task_stop = _gf_false;
f338ef
+            pthread_cond_signal(&priv->janitor_cond);
f338ef
+            pthread_mutex_unlock(&priv->janitor_mutex);
f338ef
+            goto out;
f338ef
+        }
f338ef
+    }
f338ef
+    pthread_mutex_unlock(&priv->janitor_mutex);
f338ef
+
f338ef
     LOCK(&priv->lock);
f338ef
     {
f338ef
         __posix_janitor_timer_start(this);
f338ef
     }
f338ef
     UNLOCK(&priv->lock);
f338ef
 
f338ef
+out:
f338ef
     return 0;
f338ef
 }
f338ef
 
f338ef
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
f338ef
     old_this = THIS;
f338ef
     THIS = this;
f338ef
 
f338ef
+    if (!priv)
f338ef
+        goto out;
f338ef
+
f338ef
     time(&now;;
f338ef
     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
f338ef
         if (priv->disable_landfill_purge) {
f338ef
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
f338ef
 
f338ef
     THIS = old_this;
f338ef
 
f338ef
+out:
f338ef
     return 0;
f338ef
 }
f338ef
 
f338ef
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
f338ef
index b0935a7..64288a7 100644
f338ef
--- a/xlators/storage/posix/src/posix.h
f338ef
+++ b/xlators/storage/posix/src/posix.h
f338ef
@@ -203,6 +203,8 @@ struct posix_private {
f338ef
     struct list_head fsyncs;
f338ef
     pthread_mutex_t fsync_mutex;
f338ef
     pthread_cond_t fsync_cond;
f338ef
+    pthread_mutex_t janitor_mutex;
f338ef
+    pthread_cond_t janitor_cond;
f338ef
     int fsync_queue_count;
f338ef
 
f338ef
     enum {
f338ef
@@ -257,6 +259,7 @@ struct posix_private {
f338ef
 
f338ef
     gf_boolean_t fips_mode_rchecksum;
f338ef
     gf_boolean_t ctime;
f338ef
+    gf_boolean_t janitor_task_stop;
f338ef
 };
f338ef
 
f338ef
 typedef struct {
f338ef
-- 
f338ef
1.8.3.1
f338ef