14f8ab
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
14f8ab
From: Mohit Agrawal <moagrawal@redhat.com>
14f8ab
Date: Tue, 16 Jul 2019 20:36:57 +0530
14f8ab
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
14f8ab
 volume in loop
14f8ab
14f8ab
Problem: In brick_mux environment sometime brick is crashed while
14f8ab
         volume stop/start in a loop.Brick is crashed in janitor task
14f8ab
         at the time of accessing priv.If posix priv is cleaned up before
14f8ab
         call janitor task then janitor task is crashed.
14f8ab
14f8ab
Solution: To avoid the crash in brick_mux environment introduce a new
14f8ab
          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
14f8ab
          wait for update the flag by janitor_task_done
14f8ab
14f8ab
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
14f8ab
> fixes: bz#1730409
14f8ab
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
14f8ab
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
14f8ab
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
14f8ab
14f8ab
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
14f8ab
fixex: bz#1729971
14f8ab
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
14f8ab
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 libglusterfs/src/glusterfs/xlator.h        |  3 +++
14f8ab
 xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
14f8ab
 xlators/protocol/server/src/server.c       |  6 ++++-
14f8ab
 xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
14f8ab
 xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
14f8ab
 xlators/storage/posix/src/posix.h          |  3 +++
14f8ab
 6 files changed, 69 insertions(+), 4 deletions(-)
14f8ab
14f8ab
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
14f8ab
index b78daad..da551e9 100644
14f8ab
--- a/libglusterfs/src/glusterfs/xlator.h
14f8ab
+++ b/libglusterfs/src/glusterfs/xlator.h
14f8ab
@@ -861,6 +861,9 @@ struct _xlator {
14f8ab
 
14f8ab
     /* Flag to notify got CHILD_DOWN event for detach brick */
14f8ab
     uint32_t notify_down;
14f8ab
+
14f8ab
+    /* Flag to avoid throw duplicate PARENT_DOWN event */
14f8ab
+    uint32_t parent_down;
14f8ab
 };
14f8ab
 
14f8ab
 /* This would be the only structure which needs to be exported by
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
index 2aa975b..812c698 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
@@ -4082,8 +4082,9 @@ out:
14f8ab
     if (msg[0]) {
14f8ab
         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
14f8ab
                msg);
14f8ab
-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
14f8ab
-                 new_brickinfo->hostname, new_brickinfo->path);
14f8ab
+        if (new_brickinfo)
14f8ab
+            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
14f8ab
+                     new_brickinfo->hostname, new_brickinfo->path);
14f8ab
     }
14f8ab
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
14f8ab
     return ret;
14f8ab
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
14f8ab
index 6ae63ba..a5f09fe 100644
14f8ab
--- a/xlators/protocol/server/src/server.c
14f8ab
+++ b/xlators/protocol/server/src/server.c
14f8ab
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
14f8ab
     gf_boolean_t victim_found = _gf_false;
14f8ab
     xlator_list_t **trav_p = NULL;
14f8ab
     xlator_t *top = NULL;
14f8ab
+    uint32_t parent_down = 0;
14f8ab
 
14f8ab
     GF_ASSERT(data);
14f8ab
 
14f8ab
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
14f8ab
         victim = (*trav_p)->xlator;
14f8ab
         if (victim->cleanup_starting &&
14f8ab
             strcmp(victim->name, victim_name) == 0) {
14f8ab
-            victim_found = _gf_true;
14f8ab
+            parent_down = victim->parent_down;
14f8ab
+            victim->parent_down = 1;
14f8ab
+            if (!parent_down)
14f8ab
+                victim_found = _gf_true;
14f8ab
             break;
14f8ab
         }
14f8ab
     }
14f8ab
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
14f8ab
index d738692..69857d9 100644
14f8ab
--- a/xlators/storage/posix/src/posix-common.c
14f8ab
+++ b/xlators/storage/posix/src/posix-common.c
14f8ab
@@ -146,10 +146,15 @@ int32_t
14f8ab
 posix_notify(xlator_t *this, int32_t event, void *data, ...)
14f8ab
 {
14f8ab
     xlator_t *victim = data;
14f8ab
+    struct posix_private *priv = this->private;
14f8ab
+    int ret = 0;
14f8ab
+    struct timespec sleep_till = {
14f8ab
+        0,
14f8ab
+    };
14f8ab
 
14f8ab
     switch (event) {
14f8ab
         case GF_EVENT_PARENT_UP: {
14f8ab
-            /* Tell the parent that posix xlator is up */
14f8ab
+            /* the parent that posix xlator is up */
14f8ab
             default_notify(this, GF_EVENT_CHILD_UP, data);
14f8ab
         } break;
14f8ab
 
14f8ab
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
14f8ab
                 break;
14f8ab
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
14f8ab
                    victim->name);
14f8ab
+
14f8ab
+            if (priv->janitor) {
14f8ab
+                pthread_mutex_lock(&priv->janitor_mutex);
14f8ab
+                {
14f8ab
+                    priv->janitor_task_stop = _gf_true;
14f8ab
+                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
14f8ab
+                                          priv->janitor);
14f8ab
+                    if (!ret) {
14f8ab
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
14f8ab
+                        sleep_till.tv_sec += 1;
14f8ab
+                        /* Wait to set janitor_task flag to _gf_false by
14f8ab
+                         * janitor_task_done */
14f8ab
+                        while (priv->janitor_task_stop) {
14f8ab
+                            (void)pthread_cond_timedwait(&priv->janitor_cond,
14f8ab
+                                                         &priv->janitor_mutex,
14f8ab
+                                                         &sleep_till);
14f8ab
+                            clock_gettime(CLOCK_REALTIME, &sleep_till);
14f8ab
+                            sleep_till.tv_sec += 1;
14f8ab
+                        }
14f8ab
+                    }
14f8ab
+                }
14f8ab
+                pthread_mutex_unlock(&priv->janitor_mutex);
14f8ab
+                GF_FREE(priv->janitor);
14f8ab
+            }
14f8ab
+            priv->janitor = NULL;
14f8ab
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
14f8ab
         } break;
14f8ab
         default:
14f8ab
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
14f8ab
 
14f8ab
     pthread_mutex_init(&_private->fsync_mutex, NULL);
14f8ab
     pthread_cond_init(&_private->fsync_cond, NULL);
14f8ab
+    pthread_mutex_init(&_private->janitor_mutex, NULL);
14f8ab
+    pthread_cond_init(&_private->janitor_cond, NULL);
14f8ab
     INIT_LIST_HEAD(&_private->fsyncs);
14f8ab
     ret = posix_spawn_ctx_janitor_thread(this);
14f8ab
     if (ret)
14f8ab
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
14f8ab
         (void)gf_thread_cleanup_xint(priv->disk_space_check);
14f8ab
         priv->disk_space_check = 0;
14f8ab
     }
14f8ab
+
14f8ab
     if (priv->janitor) {
14f8ab
         /*TODO: Make sure the synctask is also complete */
14f8ab
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
14f8ab
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
14f8ab
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
14f8ab
                    "Failed to delete janitor timer");
14f8ab
         }
14f8ab
+        GF_FREE(priv->janitor);
14f8ab
         priv->janitor = NULL;
14f8ab
     }
14f8ab
+
14f8ab
     if (priv->fsyncer) {
14f8ab
         (void)gf_thread_cleanup_xint(priv->fsyncer);
14f8ab
         priv->fsyncer = 0;
14f8ab
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
14f8ab
     GF_FREE(priv->base_path);
14f8ab
     LOCK_DESTROY(&priv->lock);
14f8ab
     pthread_mutex_destroy(&priv->fsync_mutex);
14f8ab
+    pthread_cond_destroy(&priv->fsync_cond);
14f8ab
+    pthread_mutex_destroy(&priv->janitor_mutex);
14f8ab
+    pthread_cond_destroy(&priv->janitor_cond);
14f8ab
     GF_FREE(priv->hostname);
14f8ab
     GF_FREE(priv->trash_path);
14f8ab
     GF_FREE(priv);
14f8ab
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
14f8ab
index 07169b5..ef5bfd5 100644
14f8ab
--- a/xlators/storage/posix/src/posix-helpers.c
14f8ab
+++ b/xlators/storage/posix/src/posix-helpers.c
14f8ab
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
14f8ab
     this = data;
14f8ab
     priv = this->private;
14f8ab
 
14f8ab
+    pthread_mutex_lock(&priv->janitor_mutex);
14f8ab
+    {
14f8ab
+        if (priv->janitor_task_stop) {
14f8ab
+            priv->janitor_task_stop = _gf_false;
14f8ab
+            pthread_cond_signal(&priv->janitor_cond);
14f8ab
+            pthread_mutex_unlock(&priv->janitor_mutex);
14f8ab
+            goto out;
14f8ab
+        }
14f8ab
+    }
14f8ab
+    pthread_mutex_unlock(&priv->janitor_mutex);
14f8ab
+
14f8ab
     LOCK(&priv->lock);
14f8ab
     {
14f8ab
         __posix_janitor_timer_start(this);
14f8ab
     }
14f8ab
     UNLOCK(&priv->lock);
14f8ab
 
14f8ab
+out:
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
14f8ab
     old_this = THIS;
14f8ab
     THIS = this;
14f8ab
 
14f8ab
+    if (!priv)
14f8ab
+        goto out;
14f8ab
+
14f8ab
     time(&now;;
14f8ab
     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
14f8ab
         if (priv->disable_landfill_purge) {
14f8ab
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
14f8ab
 
14f8ab
     THIS = old_this;
14f8ab
 
14f8ab
+out:
14f8ab
     return 0;
14f8ab
 }
14f8ab
 
14f8ab
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
14f8ab
index b0935a7..64288a7 100644
14f8ab
--- a/xlators/storage/posix/src/posix.h
14f8ab
+++ b/xlators/storage/posix/src/posix.h
14f8ab
@@ -203,6 +203,8 @@ struct posix_private {
14f8ab
     struct list_head fsyncs;
14f8ab
     pthread_mutex_t fsync_mutex;
14f8ab
     pthread_cond_t fsync_cond;
14f8ab
+    pthread_mutex_t janitor_mutex;
14f8ab
+    pthread_cond_t janitor_cond;
14f8ab
     int fsync_queue_count;
14f8ab
 
14f8ab
     enum {
14f8ab
@@ -257,6 +259,7 @@ struct posix_private {
14f8ab
 
14f8ab
     gf_boolean_t fips_mode_rchecksum;
14f8ab
     gf_boolean_t ctime;
14f8ab
+    gf_boolean_t janitor_task_stop;
14f8ab
 };
14f8ab
 
14f8ab
 typedef struct {
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab