50dc83
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
50dc83
From: Mohit Agrawal <moagrawal@redhat.com>
50dc83
Date: Tue, 16 Jul 2019 20:36:57 +0530
50dc83
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
50dc83
 volume in loop
50dc83
50dc83
Problem: In brick_mux environment sometime brick is crashed while
50dc83
         volume stop/start in a loop.Brick is crashed in janitor task
50dc83
         at the time of accessing priv.If posix priv is cleaned up before
50dc83
         call janitor task then janitor task is crashed.
50dc83
50dc83
Solution: To avoid the crash in brick_mux environment introduce a new
50dc83
          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
50dc83
          wait for update the flag by janitor_task_done
50dc83
50dc83
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
50dc83
> fixes: bz#1730409
50dc83
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
50dc83
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
50dc83
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
50dc83
50dc83
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
50dc83
fixex: bz#1729971
50dc83
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
50dc83
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
50dc83
---
50dc83
 libglusterfs/src/glusterfs/xlator.h        |  3 +++
50dc83
 xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
50dc83
 xlators/protocol/server/src/server.c       |  6 ++++-
50dc83
 xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
50dc83
 xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
50dc83
 xlators/storage/posix/src/posix.h          |  3 +++
50dc83
 6 files changed, 69 insertions(+), 4 deletions(-)
50dc83
50dc83
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
50dc83
index b78daad..da551e9 100644
50dc83
--- a/libglusterfs/src/glusterfs/xlator.h
50dc83
+++ b/libglusterfs/src/glusterfs/xlator.h
50dc83
@@ -861,6 +861,9 @@ struct _xlator {
50dc83
 
50dc83
     /* Flag to notify got CHILD_DOWN event for detach brick */
50dc83
     uint32_t notify_down;
50dc83
+
50dc83
+    /* Flag to avoid throw duplicate PARENT_DOWN event */
50dc83
+    uint32_t parent_down;
50dc83
 };
50dc83
 
50dc83
 /* This would be the only structure which needs to be exported by
50dc83
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
50dc83
index 2aa975b..812c698 100644
50dc83
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
50dc83
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
50dc83
@@ -4082,8 +4082,9 @@ out:
50dc83
     if (msg[0]) {
50dc83
         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
50dc83
                msg);
50dc83
-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
50dc83
-                 new_brickinfo->hostname, new_brickinfo->path);
50dc83
+        if (new_brickinfo)
50dc83
+            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
50dc83
+                     new_brickinfo->hostname, new_brickinfo->path);
50dc83
     }
50dc83
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
50dc83
     return ret;
50dc83
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
50dc83
index 6ae63ba..a5f09fe 100644
50dc83
--- a/xlators/protocol/server/src/server.c
50dc83
+++ b/xlators/protocol/server/src/server.c
50dc83
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
50dc83
     gf_boolean_t victim_found = _gf_false;
50dc83
     xlator_list_t **trav_p = NULL;
50dc83
     xlator_t *top = NULL;
50dc83
+    uint32_t parent_down = 0;
50dc83
 
50dc83
     GF_ASSERT(data);
50dc83
 
50dc83
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
50dc83
         victim = (*trav_p)->xlator;
50dc83
         if (victim->cleanup_starting &&
50dc83
             strcmp(victim->name, victim_name) == 0) {
50dc83
-            victim_found = _gf_true;
50dc83
+            parent_down = victim->parent_down;
50dc83
+            victim->parent_down = 1;
50dc83
+            if (!parent_down)
50dc83
+                victim_found = _gf_true;
50dc83
             break;
50dc83
         }
50dc83
     }
50dc83
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
50dc83
index d738692..69857d9 100644
50dc83
--- a/xlators/storage/posix/src/posix-common.c
50dc83
+++ b/xlators/storage/posix/src/posix-common.c
50dc83
@@ -146,10 +146,15 @@ int32_t
50dc83
 posix_notify(xlator_t *this, int32_t event, void *data, ...)
50dc83
 {
50dc83
     xlator_t *victim = data;
50dc83
+    struct posix_private *priv = this->private;
50dc83
+    int ret = 0;
50dc83
+    struct timespec sleep_till = {
50dc83
+        0,
50dc83
+    };
50dc83
 
50dc83
     switch (event) {
50dc83
         case GF_EVENT_PARENT_UP: {
50dc83
-            /* Tell the parent that posix xlator is up */
50dc83
+            /* the parent that posix xlator is up */
50dc83
             default_notify(this, GF_EVENT_CHILD_UP, data);
50dc83
         } break;
50dc83
 
50dc83
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
50dc83
                 break;
50dc83
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
50dc83
                    victim->name);
50dc83
+
50dc83
+            if (priv->janitor) {
50dc83
+                pthread_mutex_lock(&priv->janitor_mutex);
50dc83
+                {
50dc83
+                    priv->janitor_task_stop = _gf_true;
50dc83
+                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
50dc83
+                                          priv->janitor);
50dc83
+                    if (!ret) {
50dc83
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
50dc83
+                        sleep_till.tv_sec += 1;
50dc83
+                        /* Wait to set janitor_task flag to _gf_false by
50dc83
+                         * janitor_task_done */
50dc83
+                        while (priv->janitor_task_stop) {
50dc83
+                            (void)pthread_cond_timedwait(&priv->janitor_cond,
50dc83
+                                                         &priv->janitor_mutex,
50dc83
+                                                         &sleep_till);
50dc83
+                            clock_gettime(CLOCK_REALTIME, &sleep_till);
50dc83
+                            sleep_till.tv_sec += 1;
50dc83
+                        }
50dc83
+                    }
50dc83
+                }
50dc83
+                pthread_mutex_unlock(&priv->janitor_mutex);
50dc83
+                GF_FREE(priv->janitor);
50dc83
+            }
50dc83
+            priv->janitor = NULL;
50dc83
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
50dc83
         } break;
50dc83
         default:
50dc83
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
50dc83
 
50dc83
     pthread_mutex_init(&_private->fsync_mutex, NULL);
50dc83
     pthread_cond_init(&_private->fsync_cond, NULL);
50dc83
+    pthread_mutex_init(&_private->janitor_mutex, NULL);
50dc83
+    pthread_cond_init(&_private->janitor_cond, NULL);
50dc83
     INIT_LIST_HEAD(&_private->fsyncs);
50dc83
     ret = posix_spawn_ctx_janitor_thread(this);
50dc83
     if (ret)
50dc83
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
50dc83
         (void)gf_thread_cleanup_xint(priv->disk_space_check);
50dc83
         priv->disk_space_check = 0;
50dc83
     }
50dc83
+
50dc83
     if (priv->janitor) {
50dc83
         /*TODO: Make sure the synctask is also complete */
50dc83
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
50dc83
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
50dc83
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
50dc83
                    "Failed to delete janitor timer");
50dc83
         }
50dc83
+        GF_FREE(priv->janitor);
50dc83
         priv->janitor = NULL;
50dc83
     }
50dc83
+
50dc83
     if (priv->fsyncer) {
50dc83
         (void)gf_thread_cleanup_xint(priv->fsyncer);
50dc83
         priv->fsyncer = 0;
50dc83
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
50dc83
     GF_FREE(priv->base_path);
50dc83
     LOCK_DESTROY(&priv->lock);
50dc83
     pthread_mutex_destroy(&priv->fsync_mutex);
50dc83
+    pthread_cond_destroy(&priv->fsync_cond);
50dc83
+    pthread_mutex_destroy(&priv->janitor_mutex);
50dc83
+    pthread_cond_destroy(&priv->janitor_cond);
50dc83
     GF_FREE(priv->hostname);
50dc83
     GF_FREE(priv->trash_path);
50dc83
     GF_FREE(priv);
50dc83
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
50dc83
index 07169b5..ef5bfd5 100644
50dc83
--- a/xlators/storage/posix/src/posix-helpers.c
50dc83
+++ b/xlators/storage/posix/src/posix-helpers.c
50dc83
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
50dc83
     this = data;
50dc83
     priv = this->private;
50dc83
 
50dc83
+    pthread_mutex_lock(&priv->janitor_mutex);
50dc83
+    {
50dc83
+        if (priv->janitor_task_stop) {
50dc83
+            priv->janitor_task_stop = _gf_false;
50dc83
+            pthread_cond_signal(&priv->janitor_cond);
50dc83
+            pthread_mutex_unlock(&priv->janitor_mutex);
50dc83
+            goto out;
50dc83
+        }
50dc83
+    }
50dc83
+    pthread_mutex_unlock(&priv->janitor_mutex);
50dc83
+
50dc83
     LOCK(&priv->lock);
50dc83
     {
50dc83
         __posix_janitor_timer_start(this);
50dc83
     }
50dc83
     UNLOCK(&priv->lock);
50dc83
 
50dc83
+out:
50dc83
     return 0;
50dc83
 }
50dc83
 
50dc83
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
50dc83
     old_this = THIS;
50dc83
     THIS = this;
50dc83
 
50dc83
+    if (!priv)
50dc83
+        goto out;
50dc83
+
50dc83
     time(&now;;
50dc83
     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
50dc83
         if (priv->disable_landfill_purge) {
50dc83
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
50dc83
 
50dc83
     THIS = old_this;
50dc83
 
50dc83
+out:
50dc83
     return 0;
50dc83
 }
50dc83
 
50dc83
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
50dc83
index b0935a7..64288a7 100644
50dc83
--- a/xlators/storage/posix/src/posix.h
50dc83
+++ b/xlators/storage/posix/src/posix.h
50dc83
@@ -203,6 +203,8 @@ struct posix_private {
50dc83
     struct list_head fsyncs;
50dc83
     pthread_mutex_t fsync_mutex;
50dc83
     pthread_cond_t fsync_cond;
50dc83
+    pthread_mutex_t janitor_mutex;
50dc83
+    pthread_cond_t janitor_cond;
50dc83
     int fsync_queue_count;
50dc83
 
50dc83
     enum {
50dc83
@@ -257,6 +259,7 @@ struct posix_private {
50dc83
 
50dc83
     gf_boolean_t fips_mode_rchecksum;
50dc83
     gf_boolean_t ctime;
50dc83
+    gf_boolean_t janitor_task_stop;
50dc83
 };
50dc83
 
50dc83
 typedef struct {
50dc83
-- 
50dc83
1.8.3.1
50dc83