74b1de
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
74b1de
From: Mohit Agrawal <moagrawal@redhat.com>
74b1de
Date: Tue, 16 Jul 2019 20:36:57 +0530
74b1de
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
74b1de
 volume in loop
74b1de
74b1de
Problem: In brick_mux environment sometime brick is crashed while
74b1de
         volume stop/start in a loop.Brick is crashed in janitor task
74b1de
         at the time of accessing priv.If posix priv is cleaned up before
74b1de
         call janitor task then janitor task is crashed.
74b1de
74b1de
Solution: To avoid the crash in brick_mux environment introduce a new
74b1de
          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
74b1de
          wait for update the flag by janitor_task_done
74b1de
74b1de
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
74b1de
> fixes: bz#1730409
74b1de
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
74b1de
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
74b1de
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
74b1de
74b1de
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
74b1de
fixex: bz#1729971
74b1de
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
74b1de
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 libglusterfs/src/glusterfs/xlator.h        |  3 +++
74b1de
 xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
74b1de
 xlators/protocol/server/src/server.c       |  6 ++++-
74b1de
 xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
74b1de
 xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
74b1de
 xlators/storage/posix/src/posix.h          |  3 +++
74b1de
 6 files changed, 69 insertions(+), 4 deletions(-)
74b1de
74b1de
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
74b1de
index b78daad..da551e9 100644
74b1de
--- a/libglusterfs/src/glusterfs/xlator.h
74b1de
+++ b/libglusterfs/src/glusterfs/xlator.h
74b1de
@@ -861,6 +861,9 @@ struct _xlator {
74b1de
 
74b1de
     /* Flag to notify got CHILD_DOWN event for detach brick */
74b1de
     uint32_t notify_down;
74b1de
+
74b1de
+    /* Flag to avoid throw duplicate PARENT_DOWN event */
74b1de
+    uint32_t parent_down;
74b1de
 };
74b1de
 
74b1de
 /* This would be the only structure which needs to be exported by
74b1de
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
74b1de
index 2aa975b..812c698 100644
74b1de
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
74b1de
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
74b1de
@@ -4082,8 +4082,9 @@ out:
74b1de
     if (msg[0]) {
74b1de
         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
74b1de
                msg);
74b1de
-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
74b1de
-                 new_brickinfo->hostname, new_brickinfo->path);
74b1de
+        if (new_brickinfo)
74b1de
+            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
74b1de
+                     new_brickinfo->hostname, new_brickinfo->path);
74b1de
     }
74b1de
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
74b1de
     return ret;
74b1de
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
74b1de
index 6ae63ba..a5f09fe 100644
74b1de
--- a/xlators/protocol/server/src/server.c
74b1de
+++ b/xlators/protocol/server/src/server.c
74b1de
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
74b1de
     gf_boolean_t victim_found = _gf_false;
74b1de
     xlator_list_t **trav_p = NULL;
74b1de
     xlator_t *top = NULL;
74b1de
+    uint32_t parent_down = 0;
74b1de
 
74b1de
     GF_ASSERT(data);
74b1de
 
74b1de
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
74b1de
         victim = (*trav_p)->xlator;
74b1de
         if (victim->cleanup_starting &&
74b1de
             strcmp(victim->name, victim_name) == 0) {
74b1de
-            victim_found = _gf_true;
74b1de
+            parent_down = victim->parent_down;
74b1de
+            victim->parent_down = 1;
74b1de
+            if (!parent_down)
74b1de
+                victim_found = _gf_true;
74b1de
             break;
74b1de
         }
74b1de
     }
74b1de
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
74b1de
index d738692..69857d9 100644
74b1de
--- a/xlators/storage/posix/src/posix-common.c
74b1de
+++ b/xlators/storage/posix/src/posix-common.c
74b1de
@@ -146,10 +146,15 @@ int32_t
74b1de
 posix_notify(xlator_t *this, int32_t event, void *data, ...)
74b1de
 {
74b1de
     xlator_t *victim = data;
74b1de
+    struct posix_private *priv = this->private;
74b1de
+    int ret = 0;
74b1de
+    struct timespec sleep_till = {
74b1de
+        0,
74b1de
+    };
74b1de
 
74b1de
     switch (event) {
74b1de
         case GF_EVENT_PARENT_UP: {
74b1de
-            /* Tell the parent that posix xlator is up */
74b1de
+            /* the parent that posix xlator is up */
74b1de
             default_notify(this, GF_EVENT_CHILD_UP, data);
74b1de
         } break;
74b1de
 
74b1de
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
74b1de
                 break;
74b1de
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
74b1de
                    victim->name);
74b1de
+
74b1de
+            if (priv->janitor) {
74b1de
+                pthread_mutex_lock(&priv->janitor_mutex);
74b1de
+                {
74b1de
+                    priv->janitor_task_stop = _gf_true;
74b1de
+                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
74b1de
+                                          priv->janitor);
74b1de
+                    if (!ret) {
74b1de
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
74b1de
+                        sleep_till.tv_sec += 1;
74b1de
+                        /* Wait to set janitor_task flag to _gf_false by
74b1de
+                         * janitor_task_done */
74b1de
+                        while (priv->janitor_task_stop) {
74b1de
+                            (void)pthread_cond_timedwait(&priv->janitor_cond,
74b1de
+                                                         &priv->janitor_mutex,
74b1de
+                                                         &sleep_till);
74b1de
+                            clock_gettime(CLOCK_REALTIME, &sleep_till);
74b1de
+                            sleep_till.tv_sec += 1;
74b1de
+                        }
74b1de
+                    }
74b1de
+                }
74b1de
+                pthread_mutex_unlock(&priv->janitor_mutex);
74b1de
+                GF_FREE(priv->janitor);
74b1de
+            }
74b1de
+            priv->janitor = NULL;
74b1de
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
74b1de
         } break;
74b1de
         default:
74b1de
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
74b1de
 
74b1de
     pthread_mutex_init(&_private->fsync_mutex, NULL);
74b1de
     pthread_cond_init(&_private->fsync_cond, NULL);
74b1de
+    pthread_mutex_init(&_private->janitor_mutex, NULL);
74b1de
+    pthread_cond_init(&_private->janitor_cond, NULL);
74b1de
     INIT_LIST_HEAD(&_private->fsyncs);
74b1de
     ret = posix_spawn_ctx_janitor_thread(this);
74b1de
     if (ret)
74b1de
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
74b1de
         (void)gf_thread_cleanup_xint(priv->disk_space_check);
74b1de
         priv->disk_space_check = 0;
74b1de
     }
74b1de
+
74b1de
     if (priv->janitor) {
74b1de
         /*TODO: Make sure the synctask is also complete */
74b1de
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
74b1de
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
74b1de
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
74b1de
                    "Failed to delete janitor timer");
74b1de
         }
74b1de
+        GF_FREE(priv->janitor);
74b1de
         priv->janitor = NULL;
74b1de
     }
74b1de
+
74b1de
     if (priv->fsyncer) {
74b1de
         (void)gf_thread_cleanup_xint(priv->fsyncer);
74b1de
         priv->fsyncer = 0;
74b1de
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
74b1de
     GF_FREE(priv->base_path);
74b1de
     LOCK_DESTROY(&priv->lock);
74b1de
     pthread_mutex_destroy(&priv->fsync_mutex);
74b1de
+    pthread_cond_destroy(&priv->fsync_cond);
74b1de
+    pthread_mutex_destroy(&priv->janitor_mutex);
74b1de
+    pthread_cond_destroy(&priv->janitor_cond);
74b1de
     GF_FREE(priv->hostname);
74b1de
     GF_FREE(priv->trash_path);
74b1de
     GF_FREE(priv);
74b1de
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
74b1de
index 07169b5..ef5bfd5 100644
74b1de
--- a/xlators/storage/posix/src/posix-helpers.c
74b1de
+++ b/xlators/storage/posix/src/posix-helpers.c
74b1de
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
74b1de
     this = data;
74b1de
     priv = this->private;
74b1de
 
74b1de
+    pthread_mutex_lock(&priv->janitor_mutex);
74b1de
+    {
74b1de
+        if (priv->janitor_task_stop) {
74b1de
+            priv->janitor_task_stop = _gf_false;
74b1de
+            pthread_cond_signal(&priv->janitor_cond);
74b1de
+            pthread_mutex_unlock(&priv->janitor_mutex);
74b1de
+            goto out;
74b1de
+        }
74b1de
+    }
74b1de
+    pthread_mutex_unlock(&priv->janitor_mutex);
74b1de
+
74b1de
     LOCK(&priv->lock);
74b1de
     {
74b1de
         __posix_janitor_timer_start(this);
74b1de
     }
74b1de
     UNLOCK(&priv->lock);
74b1de
 
74b1de
+out:
74b1de
     return 0;
74b1de
 }
74b1de
 
74b1de
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
74b1de
     old_this = THIS;
74b1de
     THIS = this;
74b1de
 
74b1de
+    if (!priv)
74b1de
+        goto out;
74b1de
+
74b1de
     time(&now;;
74b1de
     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
74b1de
         if (priv->disable_landfill_purge) {
74b1de
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
74b1de
 
74b1de
     THIS = old_this;
74b1de
 
74b1de
+out:
74b1de
     return 0;
74b1de
 }
74b1de
 
74b1de
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
74b1de
index b0935a7..64288a7 100644
74b1de
--- a/xlators/storage/posix/src/posix.h
74b1de
+++ b/xlators/storage/posix/src/posix.h
74b1de
@@ -203,6 +203,8 @@ struct posix_private {
74b1de
     struct list_head fsyncs;
74b1de
     pthread_mutex_t fsync_mutex;
74b1de
     pthread_cond_t fsync_cond;
74b1de
+    pthread_mutex_t janitor_mutex;
74b1de
+    pthread_cond_t janitor_cond;
74b1de
     int fsync_queue_count;
74b1de
 
74b1de
     enum {
74b1de
@@ -257,6 +259,7 @@ struct posix_private {
74b1de
 
74b1de
     gf_boolean_t fips_mode_rchecksum;
74b1de
     gf_boolean_t ctime;
74b1de
+    gf_boolean_t janitor_task_stop;
74b1de
 };
74b1de
 
74b1de
 typedef struct {
74b1de
-- 
74b1de
1.8.3.1
74b1de