e3c68b
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
e3c68b
From: Mohit Agrawal <moagrawal@redhat.com>
e3c68b
Date: Tue, 16 Jul 2019 20:36:57 +0530
e3c68b
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
e3c68b
 volume in loop
e3c68b
e3c68b
Problem: In brick_mux environment sometime brick is crashed while
e3c68b
         volume stop/start in a loop.Brick is crashed in janitor task
e3c68b
         at the time of accessing priv.If posix priv is cleaned up before
e3c68b
         call janitor task then janitor task is crashed.
e3c68b
e3c68b
Solution: To avoid the crash in brick_mux environment introduce a new
e3c68b
          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
e3c68b
          wait for update the flag by janitor_task_done
e3c68b
e3c68b
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
e3c68b
> fixes: bz#1730409
e3c68b
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
e3c68b
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
e3c68b
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
e3c68b
e3c68b
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
e3c68b
fixex: bz#1729971
e3c68b
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
e3c68b
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
e3c68b
---
e3c68b
 libglusterfs/src/glusterfs/xlator.h        |  3 +++
e3c68b
 xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
e3c68b
 xlators/protocol/server/src/server.c       |  6 ++++-
e3c68b
 xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
e3c68b
 xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
e3c68b
 xlators/storage/posix/src/posix.h          |  3 +++
e3c68b
 6 files changed, 69 insertions(+), 4 deletions(-)
e3c68b
e3c68b
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
e3c68b
index b78daad..da551e9 100644
e3c68b
--- a/libglusterfs/src/glusterfs/xlator.h
e3c68b
+++ b/libglusterfs/src/glusterfs/xlator.h
e3c68b
@@ -861,6 +861,9 @@ struct _xlator {
e3c68b
 
e3c68b
     /* Flag to notify got CHILD_DOWN event for detach brick */
e3c68b
     uint32_t notify_down;
e3c68b
+
e3c68b
+    /* Flag to avoid throw duplicate PARENT_DOWN event */
e3c68b
+    uint32_t parent_down;
e3c68b
 };
e3c68b
 
e3c68b
 /* This would be the only structure which needs to be exported by
e3c68b
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
index 2aa975b..812c698 100644
e3c68b
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
@@ -4082,8 +4082,9 @@ out:
e3c68b
     if (msg[0]) {
e3c68b
         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
e3c68b
                msg);
e3c68b
-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
e3c68b
-                 new_brickinfo->hostname, new_brickinfo->path);
e3c68b
+        if (new_brickinfo)
e3c68b
+            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
e3c68b
+                     new_brickinfo->hostname, new_brickinfo->path);
e3c68b
     }
e3c68b
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
e3c68b
     return ret;
e3c68b
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
e3c68b
index 6ae63ba..a5f09fe 100644
e3c68b
--- a/xlators/protocol/server/src/server.c
e3c68b
+++ b/xlators/protocol/server/src/server.c
e3c68b
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
e3c68b
     gf_boolean_t victim_found = _gf_false;
e3c68b
     xlator_list_t **trav_p = NULL;
e3c68b
     xlator_t *top = NULL;
e3c68b
+    uint32_t parent_down = 0;
e3c68b
 
e3c68b
     GF_ASSERT(data);
e3c68b
 
e3c68b
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
e3c68b
         victim = (*trav_p)->xlator;
e3c68b
         if (victim->cleanup_starting &&
e3c68b
             strcmp(victim->name, victim_name) == 0) {
e3c68b
-            victim_found = _gf_true;
e3c68b
+            parent_down = victim->parent_down;
e3c68b
+            victim->parent_down = 1;
e3c68b
+            if (!parent_down)
e3c68b
+                victim_found = _gf_true;
e3c68b
             break;
e3c68b
         }
e3c68b
     }
e3c68b
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
e3c68b
index d738692..69857d9 100644
e3c68b
--- a/xlators/storage/posix/src/posix-common.c
e3c68b
+++ b/xlators/storage/posix/src/posix-common.c
e3c68b
@@ -146,10 +146,15 @@ int32_t
e3c68b
 posix_notify(xlator_t *this, int32_t event, void *data, ...)
e3c68b
 {
e3c68b
     xlator_t *victim = data;
e3c68b
+    struct posix_private *priv = this->private;
e3c68b
+    int ret = 0;
e3c68b
+    struct timespec sleep_till = {
e3c68b
+        0,
e3c68b
+    };
e3c68b
 
e3c68b
     switch (event) {
e3c68b
         case GF_EVENT_PARENT_UP: {
e3c68b
-            /* Tell the parent that posix xlator is up */
e3c68b
+            /* the parent that posix xlator is up */
e3c68b
             default_notify(this, GF_EVENT_CHILD_UP, data);
e3c68b
         } break;
e3c68b
 
e3c68b
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
e3c68b
                 break;
e3c68b
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
e3c68b
                    victim->name);
e3c68b
+
e3c68b
+            if (priv->janitor) {
e3c68b
+                pthread_mutex_lock(&priv->janitor_mutex);
e3c68b
+                {
e3c68b
+                    priv->janitor_task_stop = _gf_true;
e3c68b
+                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
e3c68b
+                                          priv->janitor);
e3c68b
+                    if (!ret) {
e3c68b
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
e3c68b
+                        sleep_till.tv_sec += 1;
e3c68b
+                        /* Wait to set janitor_task flag to _gf_false by
e3c68b
+                         * janitor_task_done */
e3c68b
+                        while (priv->janitor_task_stop) {
e3c68b
+                            (void)pthread_cond_timedwait(&priv->janitor_cond,
e3c68b
+                                                         &priv->janitor_mutex,
e3c68b
+                                                         &sleep_till);
e3c68b
+                            clock_gettime(CLOCK_REALTIME, &sleep_till);
e3c68b
+                            sleep_till.tv_sec += 1;
e3c68b
+                        }
e3c68b
+                    }
e3c68b
+                }
e3c68b
+                pthread_mutex_unlock(&priv->janitor_mutex);
e3c68b
+                GF_FREE(priv->janitor);
e3c68b
+            }
e3c68b
+            priv->janitor = NULL;
e3c68b
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
e3c68b
         } break;
e3c68b
         default:
e3c68b
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
e3c68b
 
e3c68b
     pthread_mutex_init(&_private->fsync_mutex, NULL);
e3c68b
     pthread_cond_init(&_private->fsync_cond, NULL);
e3c68b
+    pthread_mutex_init(&_private->janitor_mutex, NULL);
e3c68b
+    pthread_cond_init(&_private->janitor_cond, NULL);
e3c68b
     INIT_LIST_HEAD(&_private->fsyncs);
e3c68b
     ret = posix_spawn_ctx_janitor_thread(this);
e3c68b
     if (ret)
e3c68b
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
e3c68b
         (void)gf_thread_cleanup_xint(priv->disk_space_check);
e3c68b
         priv->disk_space_check = 0;
e3c68b
     }
e3c68b
+
e3c68b
     if (priv->janitor) {
e3c68b
         /*TODO: Make sure the synctask is also complete */
e3c68b
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
e3c68b
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
e3c68b
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
e3c68b
                    "Failed to delete janitor timer");
e3c68b
         }
e3c68b
+        GF_FREE(priv->janitor);
e3c68b
         priv->janitor = NULL;
e3c68b
     }
e3c68b
+
e3c68b
     if (priv->fsyncer) {
e3c68b
         (void)gf_thread_cleanup_xint(priv->fsyncer);
e3c68b
         priv->fsyncer = 0;
e3c68b
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
e3c68b
     GF_FREE(priv->base_path);
e3c68b
     LOCK_DESTROY(&priv->lock);
e3c68b
     pthread_mutex_destroy(&priv->fsync_mutex);
e3c68b
+    pthread_cond_destroy(&priv->fsync_cond);
e3c68b
+    pthread_mutex_destroy(&priv->janitor_mutex);
e3c68b
+    pthread_cond_destroy(&priv->janitor_cond);
e3c68b
     GF_FREE(priv->hostname);
e3c68b
     GF_FREE(priv->trash_path);
e3c68b
     GF_FREE(priv);
e3c68b
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
e3c68b
index 07169b5..ef5bfd5 100644
e3c68b
--- a/xlators/storage/posix/src/posix-helpers.c
e3c68b
+++ b/xlators/storage/posix/src/posix-helpers.c
e3c68b
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
e3c68b
     this = data;
e3c68b
     priv = this->private;
e3c68b
 
e3c68b
+    pthread_mutex_lock(&priv->janitor_mutex);
e3c68b
+    {
e3c68b
+        if (priv->janitor_task_stop) {
e3c68b
+            priv->janitor_task_stop = _gf_false;
e3c68b
+            pthread_cond_signal(&priv->janitor_cond);
e3c68b
+            pthread_mutex_unlock(&priv->janitor_mutex);
e3c68b
+            goto out;
e3c68b
+        }
e3c68b
+    }
e3c68b
+    pthread_mutex_unlock(&priv->janitor_mutex);
e3c68b
+
e3c68b
     LOCK(&priv->lock);
e3c68b
     {
e3c68b
         __posix_janitor_timer_start(this);
e3c68b
     }
e3c68b
     UNLOCK(&priv->lock);
e3c68b
 
e3c68b
+out:
e3c68b
     return 0;
e3c68b
 }
e3c68b
 
e3c68b
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
e3c68b
     old_this = THIS;
e3c68b
     THIS = this;
e3c68b
 
e3c68b
+    if (!priv)
e3c68b
+        goto out;
e3c68b
+
e3c68b
     time(&now;;
e3c68b
     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
e3c68b
         if (priv->disable_landfill_purge) {
e3c68b
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
e3c68b
 
e3c68b
     THIS = old_this;
e3c68b
 
e3c68b
+out:
e3c68b
     return 0;
e3c68b
 }
e3c68b
 
e3c68b
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
e3c68b
index b0935a7..64288a7 100644
e3c68b
--- a/xlators/storage/posix/src/posix.h
e3c68b
+++ b/xlators/storage/posix/src/posix.h
e3c68b
@@ -203,6 +203,8 @@ struct posix_private {
e3c68b
     struct list_head fsyncs;
e3c68b
     pthread_mutex_t fsync_mutex;
e3c68b
     pthread_cond_t fsync_cond;
e3c68b
+    pthread_mutex_t janitor_mutex;
e3c68b
+    pthread_cond_t janitor_cond;
e3c68b
     int fsync_queue_count;
e3c68b
 
e3c68b
     enum {
e3c68b
@@ -257,6 +259,7 @@ struct posix_private {
e3c68b
 
e3c68b
     gf_boolean_t fips_mode_rchecksum;
e3c68b
     gf_boolean_t ctime;
e3c68b
+    gf_boolean_t janitor_task_stop;
e3c68b
 };
e3c68b
 
e3c68b
 typedef struct {
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b