887953
From 9b26837434977601f1e48477904486ea032f742a Mon Sep 17 00:00:00 2001
887953
From: Mohit Agrawal <moagrawa@redhat.com>
887953
Date: Mon, 8 Oct 2018 22:32:28 +0530
887953
Subject: [PATCH 390/399] core: glusterfsd keeping fd open in index xlator
887953
887953
Problem: Current resource cleanup sequence is not
887953
         perfect while brick mux is enabled
887953
887953
Solution: 1. Destroying xprt after cleanup all fd associated
887953
             with a client
887953
          2. Before call fini for brick xlators ensure no stub
887953
             should be running on a brick
887953
887953
> Change-Id: I86195785e428f57d3ef0da3e4061021fafacd435
887953
> fixes: bz#1631357
887953
> (cherry picked from commit 5bc4594dabc08fd4de1940c044946e33037f2ac7)
887953
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21235/)
887953
887953
Change-Id: I0676a2f8e42557c1107a877406e255b93a77a269
887953
BUG: 1631372
887953
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/152170
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
887953
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
887953
---
887953
 libglusterfs/src/defaults-tmpl.c                |   8 +-
887953
 libglusterfs/src/xlator.c                       |   2 +
887953
 libglusterfs/src/xlator.h                       |   7 +
887953
 xlators/features/index/src/index.c              |  50 ++++++-
887953
 xlators/features/index/src/index.h              |   2 +
887953
 xlators/performance/io-threads/src/io-threads.c |  45 ++++++-
887953
 xlators/performance/io-threads/src/io-threads.h |   2 +-
887953
 xlators/protocol/server/src/server-handshake.c  |  23 +++-
887953
 xlators/protocol/server/src/server-helpers.c    |  79 +++++++++--
887953
 xlators/protocol/server/src/server-helpers.h    |   2 +-
887953
 xlators/protocol/server/src/server.c            | 171 ++++++++++++++++--------
887953
 xlators/protocol/server/src/server.h            |   9 +-
887953
 xlators/storage/posix/src/posix.c               |  11 ++
887953
 13 files changed, 333 insertions(+), 78 deletions(-)
887953
887953
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
887953
index 0ef14d5..3fdeabb 100644
887953
--- a/libglusterfs/src/defaults-tmpl.c
887953
+++ b/libglusterfs/src/defaults-tmpl.c
887953
@@ -119,6 +119,8 @@ default_release (xlator_t *this, fd_t *fd)
887953
 int
887953
 default_notify (xlator_t *this, int32_t event, void *data, ...)
887953
 {
887953
+        xlator_t *victim = data;
887953
+
887953
         switch (event) {
887953
         case GF_EVENT_PARENT_UP:
887953
         case GF_EVENT_PARENT_DOWN:
887953
@@ -126,7 +128,11 @@ default_notify (xlator_t *this, int32_t event, void *data, ...)
887953
                 xlator_list_t *list = this->children;
887953
 
887953
                 while (list) {
887953
-                        xlator_notify (list->xlator, event, this);
887953
+                        if (victim && victim->cleanup_starting) {
887953
+                                xlator_notify(list->xlator, event, victim);
887953
+                        } else {
887953
+                                xlator_notify(list->xlator, event, this);
887953
+                        }
887953
                         list = list->next;
887953
                 }
887953
         }
887953
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
887953
index 1cf4a63..8aa8aa1 100644
887953
--- a/libglusterfs/src/xlator.c
887953
+++ b/libglusterfs/src/xlator.c
887953
@@ -489,6 +489,8 @@ xlator_init (xlator_t *xl)
887953
                 xl->mem_acct_init (xl);
887953
 
887953
         xl->instance_name = NULL;
887953
+        GF_ATOMIC_INIT(xl->xprtrefcnt, 0);
887953
+        GF_ATOMIC_INIT(xl->fd_cnt, 0);
887953
         if (!xl->init) {
887953
                 gf_msg (xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED,
887953
                         "No init() found");
887953
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
887953
index 7434da8..1879641 100644
887953
--- a/libglusterfs/src/xlator.h
887953
+++ b/libglusterfs/src/xlator.h
887953
@@ -965,7 +965,14 @@ struct _xlator {
887953
         /* flag to avoid recall of xlator_mem_cleanup for xame xlator */
887953
         uint32_t call_cleanup;
887953
 
887953
+        /* Variable to save fd_count for detach brick */
887953
+        gf_atomic_t fd_cnt;
887953
 
887953
+        /* Variable to save xprt associated for detach brick */
887953
+        gf_atomic_t xprtrefcnt;
887953
+
887953
+        /* Flag to notify got CHILD_DOWN event for detach brick */
887953
+        uint32_t notify_down;
887953
 };
887953
 
887953
 typedef struct {
887953
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
887953
index f3b0270..bf3f4dd 100644
887953
--- a/xlators/features/index/src/index.c
887953
+++ b/xlators/features/index/src/index.c
887953
@@ -188,6 +188,7 @@ worker_enqueue (xlator_t *this, call_stub_t *stub)
887953
         pthread_mutex_lock (&priv->mutex);
887953
         {
887953
                 __index_enqueue (&priv->callstubs, stub);
887953
+                GF_ATOMIC_INC(priv->stub_cnt);
887953
                 pthread_cond_signal (&priv->cond);
887953
         }
887953
         pthread_mutex_unlock (&priv->mutex);
887953
@@ -223,11 +224,18 @@ index_worker (void *data)
887953
                         }
887953
                         if (!bye)
887953
                                 stub = __index_dequeue (&priv->callstubs);
887953
+                        if (bye) {
887953
+                                priv->curr_count--;
887953
+                                if (priv->curr_count == 0)
887953
+                                        pthread_cond_broadcast(&priv->cond);
887953
+                        }
887953
                 }
887953
                 pthread_mutex_unlock (&priv->mutex);
887953
 
887953
-                if (stub) /* guard against spurious wakeups */
887953
+                if (stub) {/* guard against spurious wakeups */
887953
                         call_resume (stub);
887953
+                        GF_ATOMIC_DEC(priv->stub_cnt);
887953
+                }
887953
                 stub = NULL;
887953
                 if (bye)
887953
                         break;
887953
@@ -2375,6 +2383,7 @@ init (xlator_t *this)
887953
                 gf_uuid_generate (priv->internal_vgfid[i]);
887953
 
887953
         INIT_LIST_HEAD (&priv->callstubs);
887953
+        GF_ATOMIC_INIT(priv->stub_cnt, 0);
887953
 
887953
         this->local_pool = mem_pool_new (index_local_t, 64);
887953
         if (!this->local_pool) {
887953
@@ -2403,6 +2412,7 @@ init (xlator_t *this)
887953
         index_set_link_count (priv, count, XATTROP);
887953
         priv->down = _gf_false;
887953
 
887953
+        priv->curr_count = 0;
887953
         ret = gf_thread_create (&priv->thread, &w_attr, index_worker, this,
887953
                                 "idxwrker");
887953
         if (ret) {
887953
@@ -2411,7 +2421,7 @@ init (xlator_t *this)
887953
                         "Failed to create worker thread, aborting");
887953
                 goto out;
887953
         }
887953
-
887953
+        priv->curr_count++;
887953
         ret = 0;
887953
 out:
887953
         if (ret) {
887953
@@ -2528,6 +2538,9 @@ notify (xlator_t *this, int event, void *data, ...)
887953
 {
887953
         int     ret = 0;
887953
         index_priv_t *priv = NULL;
887953
+        uint64_t stub_cnt = 0;
887953
+        xlator_t *victim = data;
887953
+        struct timespec sleep_till = {0,};
887953
 
887953
         if (!this)
887953
                 return 0;
887953
@@ -2536,6 +2549,39 @@ notify (xlator_t *this, int event, void *data, ...)
887953
         if (!priv)
887953
                 return 0;
887953
 
887953
+        if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
887953
+                stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
887953
+                clock_gettime(CLOCK_REALTIME, &sleep_till);
887953
+                sleep_till.tv_sec += 1;
887953
+
887953
+                /* Wait for draining stub from queue before notify PARENT_DOWN */
887953
+                pthread_mutex_lock(&priv->mutex);
887953
+                {
887953
+                        while (stub_cnt) {
887953
+                                (void)pthread_cond_timedwait(&priv->cond, &priv->mutex,
887953
+                                                             &sleep_till);
887953
+                                stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
887953
+                        }
887953
+                }
887953
+                pthread_mutex_unlock(&priv->mutex);
887953
+                gf_log(this->name, GF_LOG_INFO,
887953
+                       "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
887953
+        }
887953
+
887953
+        if ((event == GF_EVENT_CHILD_DOWN) && victim->cleanup_starting) {
887953
+                pthread_mutex_lock(&priv->mutex);
887953
+                {
887953
+                        priv->down = _gf_true;
887953
+                        pthread_cond_broadcast(&priv->cond);
887953
+                        while (priv->curr_count)
887953
+                                pthread_cond_wait(&priv->cond, &priv->mutex);
887953
+                }
887953
+                pthread_mutex_unlock(&priv->mutex);
887953
+
887953
+                gf_log(this->name, GF_LOG_INFO,
887953
+                       "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name);
887953
+        }
887953
+
887953
         ret = default_notify (this, event, data);
887953
         return ret;
887953
 }
887953
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
887953
index ae9091d..d935294 100644
887953
--- a/xlators/features/index/src/index.h
887953
+++ b/xlators/features/index/src/index.h
887953
@@ -62,6 +62,8 @@ typedef struct index_priv {
887953
         int64_t  pending_count;
887953
         pthread_t thread;
887953
         gf_boolean_t down;
887953
+        gf_atomic_t stub_cnt;
887953
+        int32_t curr_count;
887953
 } index_priv_t;
887953
 
887953
 typedef struct index_local {
887953
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
887953
index 1e1816a..5c47072 100644
887953
--- a/xlators/performance/io-threads/src/io-threads.c
887953
+++ b/xlators/performance/io-threads/src/io-threads.c
887953
@@ -120,7 +120,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri)
887953
         if (!stub)
887953
                 return NULL;
887953
 
887953
-        conf->queue_size--;
887953
+        GF_ATOMIC_DEC(conf->queue_size);
887953
         conf->queue_sizes[*pri]--;
887953
 
887953
         return stub;
887953
@@ -153,7 +153,7 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
887953
         }
887953
         list_add_tail (&stub->list, &ctx->reqs);
887953
 
887953
-        conf->queue_size++;
887953
+        GF_ATOMIC_INC(conf->queue_size);
887953
         conf->queue_sizes[pri]++;
887953
 }
887953
 
887953
@@ -182,7 +182,7 @@ iot_worker (void *data)
887953
                                 conf->ac_iot_count[pri]--;
887953
                                 pri = -1;
887953
                         }
887953
-                        while (conf->queue_size == 0) {
887953
+                        while (GF_ATOMIC_GET(conf->queue_size) == 0) {
887953
                                 if (conf->down) {
887953
                                         bye = _gf_true;/*Avoid sleep*/
887953
                                         break;
887953
@@ -816,7 +816,7 @@ __iot_workers_scale (iot_conf_t *conf)
887953
                         gf_msg_debug (conf->this->name, 0,
887953
                                       "scaled threads to %d (queue_size=%d/%d)",
887953
                                       conf->curr_count,
887953
-                                      conf->queue_size, scale);
887953
+                                      GF_ATOMIC_GET(conf->queue_size), scale);
887953
                 } else {
887953
                         break;
887953
                 }
887953
@@ -1030,6 +1030,7 @@ init (xlator_t *this)
887953
                         bool, out);
887953
 
887953
         conf->this = this;
887953
+        GF_ATOMIC_INIT(conf->queue_size, 0);
887953
 
887953
         for (i = 0; i < IOT_PRI_MAX; i++) {
887953
                 INIT_LIST_HEAD (&conf->clients[i]);
887953
@@ -1073,9 +1074,41 @@ int
887953
 notify (xlator_t *this, int32_t event, void *data, ...)
887953
 {
887953
         iot_conf_t *conf = this->private;
887953
+        xlator_t *victim = data;
887953
+        uint64_t queue_size = 0;
887953
+        struct timespec sleep_till = {0, };
887953
+
887953
+        if (GF_EVENT_PARENT_DOWN == event) {
887953
+                if (victim->cleanup_starting) {
887953
+                        clock_gettime(CLOCK_REALTIME, &sleep_till);
887953
+                        sleep_till.tv_sec += 1;
887953
+                        /* Wait for draining stub from queue before notify PARENT_DOWN */
887953
+                        queue_size = GF_ATOMIC_GET(conf->queue_size);
887953
+
887953
+                        pthread_mutex_lock(&conf->mutex);
887953
+                        {
887953
+                                while (queue_size) {
887953
+                                        (void)pthread_cond_timedwait(&conf->cond, &conf->mutex,
887953
+                                                                     &sleep_till);
887953
+                                        queue_size = GF_ATOMIC_GET(conf->queue_size);
887953
+                                }
887953
+                        }
887953
+                        pthread_mutex_unlock(&conf->mutex);
887953
 
887953
-        if (GF_EVENT_PARENT_DOWN == event)
887953
-                iot_exit_threads (conf);
887953
+                        gf_log(this->name, GF_LOG_INFO,
887953
+                               "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
887953
+                } else {
887953
+                        iot_exit_threads(conf);
887953
+                }
887953
+        }
887953
+
887953
+        if (GF_EVENT_CHILD_DOWN == event) {
887953
+                if (victim->cleanup_starting) {
887953
+                        iot_exit_threads(conf);
887953
+                        gf_log(this->name, GF_LOG_INFO,
887953
+                               "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name);
887953
+                }
887953
+        }
887953
 
887953
         default_notify (this, event, data);
887953
 
887953
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
887953
index 9648f74..7a6973c 100644
887953
--- a/xlators/performance/io-threads/src/io-threads.h
887953
+++ b/xlators/performance/io-threads/src/io-threads.h
887953
@@ -75,7 +75,7 @@ struct iot_conf {
887953
         int32_t              ac_iot_limit[IOT_PRI_MAX];
887953
         int32_t              ac_iot_count[IOT_PRI_MAX];
887953
         int                  queue_sizes[IOT_PRI_MAX];
887953
-        int                  queue_size;
887953
+        gf_atomic_t          queue_size;
887953
         pthread_attr_t       w_attr;
887953
         gf_boolean_t         least_priority; /*Enable/Disable least-priority */
887953
 
887953
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
887953
index 12f620c..75577fa 100644
887953
--- a/xlators/protocol/server/src/server-handshake.c
887953
+++ b/xlators/protocol/server/src/server-handshake.c
887953
@@ -576,6 +576,7 @@ server_setvolume (rpcsvc_request_t *req)
887953
                 goto fail;
887953
         }
887953
 
887953
+        pthread_mutex_lock(&conf->mutex);
887953
         list_for_each_entry (tmp, &conf->child_status->status_list,
887953
                                                                   status_list) {
887953
                 if (strcmp (tmp->name, name) == 0)
887953
@@ -583,7 +584,7 @@ server_setvolume (rpcsvc_request_t *req)
887953
         }
887953
 
887953
         if (!tmp->name) {
887953
-                gf_msg (this->name, GF_LOG_ERROR, 0,
887953
+                gf_msg (this->name, GF_LOG_INFO, 0,
887953
                         PS_MSG_CHILD_STATUS_FAILED,
887953
                         "No xlator %s is found in "
887953
                         "child status list", name);
887953
@@ -594,7 +595,21 @@ server_setvolume (rpcsvc_request_t *req)
887953
                                 PS_MSG_DICT_GET_FAILED,
887953
                                 "Failed to set 'child_up' for xlator %s "
887953
                                 "in the reply dict", tmp->name);
887953
+                if (!tmp->child_up) {
887953
+                        ret = dict_set_str(reply, "ERROR",
887953
+                                           "Not received child_up for this xlator");
887953
+                        if (ret < 0)
887953
+                                gf_msg_debug(this->name, 0, "failed to set error msg");
887953
+
887953
+                        gf_msg(this->name, GF_LOG_ERROR, 0, PS_MSG_CHILD_STATUS_FAILED,
887953
+                               "Not received child_up for this xlator %s", name);
887953
+                        op_ret = -1;
887953
+                        op_errno = EAGAIN;
887953
+                        pthread_mutex_unlock(&conf->mutex);
887953
+                        goto fail;
887953
+                }
887953
         }
887953
+        pthread_mutex_unlock(&conf->mutex);
887953
 
887953
         ret = dict_get_str (params, "process-uuid", &client_uid);
887953
         if (ret < 0) {
887953
@@ -666,7 +681,7 @@ server_setvolume (rpcsvc_request_t *req)
887953
         if (serv_ctx->lk_version != 0 &&
887953
             serv_ctx->lk_version != lk_version) {
887953
                 (void) server_connection_cleanup (this, client,
887953
-                                                  INTERNAL_LOCKS | POSIX_LOCKS);
887953
+                                                  INTERNAL_LOCKS | POSIX_LOCKS, NULL);
887953
         }
887953
 
887953
         pthread_mutex_lock (&conf->mutex);
887953
@@ -812,9 +827,9 @@ server_setvolume (rpcsvc_request_t *req)
887953
                 req->trans->clnt_options = dict_ref(params);
887953
 
887953
                 gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_CLIENT_ACCEPTED,
887953
-                        "accepted client from %s (version: %s)",
887953
+                        "accepted client from %s (version: %s) with subvol %s",
887953
                         client->client_uid,
887953
-                        (clnt_version) ? clnt_version : "old");
887953
+                        (clnt_version) ? clnt_version : "old", name);
887953
 
887953
                 gf_event (EVENT_CLIENT_CONNECT, "client_uid=%s;"
887953
                           "client_identifier=%s;server_identifier=%s;"
887953
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
887953
index c492ab1..99256bf 100644
887953
--- a/xlators/protocol/server/src/server-helpers.c
887953
+++ b/xlators/protocol/server/src/server-helpers.c
887953
@@ -242,16 +242,51 @@ server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie,
887953
         int32_t    ret    = -1;
887953
         fd_t      *fd     = NULL;
887953
         client_t  *client = NULL;
887953
+        uint64_t fd_cnt = 0;
887953
+        xlator_t *victim = NULL;
887953
+        server_conf_t *conf = NULL;
887953
+        xlator_t *serv_xl = NULL;
887953
+        rpc_transport_t *xprt = NULL;
887953
+        rpc_transport_t *xp_next = NULL;
887953
+        int32_t detach = (long)cookie;
887953
+        gf_boolean_t xprt_found = _gf_false;
887953
 
887953
         GF_VALIDATE_OR_GOTO ("server", this, out);
887953
         GF_VALIDATE_OR_GOTO ("server", frame, out);
887953
 
887953
         fd = frame->local;
887953
         client = frame->root->client;
887953
+        serv_xl = frame->this;
887953
+        conf = serv_xl->private;
887953
 
887953
         fd_unref (fd);
887953
         frame->local = NULL;
887953
 
887953
+        if (client)
887953
+                victim = client->bound_xl;
887953
+
887953
+        if (victim) {
887953
+                fd_cnt = GF_ATOMIC_DEC(victim->fd_cnt);
887953
+                if (!fd_cnt && conf && detach) {
887953
+                        pthread_mutex_lock(&conf->mutex);
887953
+                        {
887953
+                        list_for_each_entry_safe(xprt, xp_next, &conf->xprt_list, list)
887953
+                        {
887953
+                                if (!xprt->xl_private)
887953
+                                        continue;
887953
+                                if (xprt->xl_private == client) {
887953
+                                        xprt_found = _gf_true;
887953
+                                        break;
887953
+                                }
887953
+                        }
887953
+                        }
887953
+                        pthread_mutex_unlock(&conf->mutex);
887953
+                        if (xprt_found) {
887953
+                                rpc_transport_unref(xprt);
887953
+                        }
887953
+                }
887953
+        }
887953
+
887953
         gf_client_unref (client);
887953
         STACK_DESTROY (frame->root);
887953
 
887953
@@ -262,7 +297,7 @@ out:
887953
 
887953
 
887953
 static int
887953
-do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_count)
887953
+do_fd_cleanup (xlator_t *this, client_t *client, fdentry_t *fdentries, int fd_count, int32_t detach)
887953
 {
887953
         fd_t               *fd = NULL;
887953
         int                 i = 0, ret = -1;
887953
@@ -307,9 +342,10 @@ do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_co
887953
                         memset (&tmp_frame->root->lk_owner, 0,
887953
                                 sizeof (gf_lkowner_t));
887953
 
887953
-                        STACK_WIND (tmp_frame,
887953
-                                    server_connection_cleanup_flush_cbk,
887953
-                                    bound_xl, bound_xl->fops->flush, fd, NULL);
887953
+                        STACK_WIND_COOKIE (tmp_frame,
887953
+                                           server_connection_cleanup_flush_cbk,
887953
+                                           (void *)(long)detach, bound_xl,
887953
+                                           bound_xl->fops->flush, fd, NULL);
887953
                 }
887953
         }
887953
 
887953
@@ -323,13 +359,18 @@ out:
887953
 
887953
 int
887953
 server_connection_cleanup (xlator_t *this, client_t *client,
887953
-                           int32_t flags)
887953
+                           int32_t flags, gf_boolean_t *fd_exist)
887953
 {
887953
         server_ctx_t        *serv_ctx  = NULL;
887953
         fdentry_t           *fdentries = NULL;
887953
         uint32_t             fd_count  = 0;
887953
         int                  cd_ret    = 0;
887953
         int                  ret       = 0;
887953
+        xlator_t            *bound_xl = NULL;
887953
+        int                  i = 0;
887953
+        fd_t                *fd = NULL;
887953
+        uint64_t             fd_cnt = 0;
887953
+        int32_t              detach = 0;
887953
 
887953
         GF_VALIDATE_OR_GOTO (this->name, this, out);
887953
         GF_VALIDATE_OR_GOTO (this->name, client, out);
887953
@@ -360,13 +401,35 @@ server_connection_cleanup (xlator_t *this, client_t *client,
887953
         }
887953
 
887953
         if (fdentries != NULL) {
887953
+                /* Loop to configure fd_count on victim brick */
887953
+                bound_xl = client->bound_xl;
887953
+                if (bound_xl) {
887953
+                        for (i = 0; i < fd_count; i++) {
887953
+                                fd = fdentries[i].fd;
887953
+                                if (!fd)
887953
+                                        continue;
887953
+                                fd_cnt++;
887953
+                        }
887953
+                        if (fd_cnt) {
887953
+                                if (fd_exist)
887953
+                                        (*fd_exist) = _gf_true;
887953
+                                GF_ATOMIC_ADD(bound_xl->fd_cnt, fd_cnt);
887953
+                        }
887953
+                }
887953
+
887953
+                /* If fd_exist is not NULL it means function is invoke
887953
+                  by server_rpc_notify at the time of getting DISCONNECT
887953
+                  notification
887953
+                */
887953
+                if (fd_exist)
887953
+                        detach = 1;
887953
                 gf_msg_debug (this->name, 0, "Performing cleanup on %d "
887953
                               "fdentries", fd_count);
887953
-                ret = do_fd_cleanup (this, client, fdentries, fd_count);
887953
-        }
887953
-        else
887953
+                ret = do_fd_cleanup (this, client, fdentries, fd_count, detach);
887953
+        } else {
887953
                 gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_FDENTRY_NULL,
887953
                         "no fdentries to clean");
887953
+        }
887953
 
887953
         if (cd_ret || ret)
887953
                 ret = -1;
887953
diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h
887953
index 1f47bc9..84a0cf4 100644
887953
--- a/xlators/protocol/server/src/server-helpers.h
887953
+++ b/xlators/protocol/server/src/server-helpers.h
887953
@@ -42,7 +42,7 @@ get_frame_from_request (rpcsvc_request_t *req);
887953
 
887953
 int
887953
 server_connection_cleanup (xlator_t *this, struct _client *client,
887953
-                           int32_t flags);
887953
+                           int32_t flags, gf_boolean_t *fd_exist);
887953
 
887953
 gf_boolean_t
887953
 server_cancel_grace_timer (xlator_t *this, struct _client *client);
887953
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
887953
index 69ad184..8ac0bd1 100644
887953
--- a/xlators/protocol/server/src/server.c
887953
+++ b/xlators/protocol/server/src/server.c
887953
@@ -79,7 +79,7 @@ grace_time_handler (void *data)
887953
 
887953
                 if (detached) /* reconnection did not happen :-( */
887953
                         server_connection_cleanup (this, client,
887953
-                                                   INTERNAL_LOCKS | POSIX_LOCKS);
887953
+                                                   INTERNAL_LOCKS | POSIX_LOCKS, NULL);
887953
                 gf_client_unref (client);
887953
         }
887953
 out:
887953
@@ -195,7 +195,7 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
887953
                                   "Reply submission failed");
887953
                 if (frame && client && !lk_heal) {
887953
                         server_connection_cleanup (frame->this, client,
887953
-                                                  INTERNAL_LOCKS | POSIX_LOCKS);
887953
+                                                  INTERNAL_LOCKS | POSIX_LOCKS, NULL);
887953
                 } else {
887953
                         gf_msg_callingfn ("", GF_LOG_ERROR, 0,
887953
                                           PS_MSG_REPLY_SUBMIT_FAILED,
887953
@@ -466,6 +466,33 @@ out:
887953
         return error;
887953
 }
887953
 
887953
+void
887953
+server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name)
887953
+{
887953
+        pthread_t th_id = { 0, };
887953
+        int th_ret = -1;
887953
+        server_cleanup_xprt_arg_t *arg = NULL;
887953
+
887953
+        if (!victim_name)
887953
+                return;
887953
+
887953
+        gf_log(this->name, GF_LOG_INFO, "Create graph janitor thread for brick %s",
887953
+               victim_name);
887953
+
887953
+        arg = calloc(1, sizeof(*arg));
887953
+        arg->this = this;
887953
+        arg->victim_name = gf_strdup(victim_name);
887953
+        th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads,
887953
+                                           arg, "graphjanitor");
887953
+        if (th_ret) {
887953
+                gf_log(this->name, GF_LOG_ERROR,
887953
+                       "graph janitor Thread"
887953
+                       " creation is failed for brick %s",
887953
+                       victim_name);
887953
+                GF_FREE(arg->victim_name);
887953
+                free(arg);
887953
+        }
887953
+}
887953
 
887953
 int
887953
 server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
@@ -480,14 +507,9 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
         struct timespec     grace_ts    = {0, };
887953
         char                *auth_path  = NULL;
887953
         int                  ret        = -1;
887953
-        gf_boolean_t         victim_found = _gf_false;
887953
         char                *xlator_name  = NULL;
887953
-        glusterfs_ctx_t     *ctx          = NULL;
887953
-        xlator_t            *top          = NULL;
887953
-        xlator_list_t      **trav_p       = NULL;
887953
-        xlator_t            *travxl       = NULL;
887953
         uint64_t             xprtrefcount = 0;
887953
-        struct _child_status *tmp         = NULL;
887953
+        gf_boolean_t         fd_exist = _gf_false;
887953
 
887953
 
887953
         if (!xl || !data) {
887953
@@ -500,7 +522,6 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
         this = xl;
887953
         trans = data;
887953
         conf = this->private;
887953
-        ctx = glusterfsd_ctx;
887953
 
887953
         switch (event) {
887953
         case RPCSVC_EVENT_ACCEPT:
887953
@@ -538,7 +559,8 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
                  */
887953
                 pthread_mutex_lock (&conf->mutex);
887953
                 client = trans->xl_private;
887953
-                list_del_init (&trans->list);
887953
+                if (!client)
887953
+                        list_del_init (&trans->list);
887953
                 pthread_mutex_unlock (&conf->mutex);
887953
 
887953
                 if (!client)
887953
@@ -563,7 +585,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
                         gf_client_put (client, &detached);
887953
                         if (detached) {
887953
                                 server_connection_cleanup (this, client,
887953
-                                                           INTERNAL_LOCKS | POSIX_LOCKS);
887953
+                                                           INTERNAL_LOCKS | POSIX_LOCKS, &fd_exist);
887953
 
887953
                                 gf_event (EVENT_CLIENT_DISCONNECT,
887953
                                           "client_uid=%s;"
887953
@@ -638,56 +660,38 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
887953
 unref_transport:
887953
                 /* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY
887953
                  * to be called in blocking manner
887953
-                 * So no code should ideally be after this unref
887953
+                 * So no code should ideally be after this unref, Call rpc_transport_unref
887953
+                 * only while no client exist or client is not detached or no fd associated
887953
+                   with client
887953
                  */
887953
-                rpc_transport_unref (trans);
887953
+                if (!client || !detached || !fd_exist)
887953
+                        rpc_transport_unref (trans);
887953
 
887953
                 break;
887953
 
887953
         case RPCSVC_EVENT_TRANSPORT_DESTROY:
887953
+                pthread_mutex_lock(&conf->mutex);
887953
                 client = trans->xl_private;
887953
+                list_del_init(&trans->list);
887953
+                pthread_mutex_unlock(&conf->mutex);
887953
                 if (!client)
887953
                         break;
887953
-                pthread_mutex_lock (&conf->mutex);
887953
-                list_for_each_entry (tmp, &conf->child_status->status_list,
887953
-                                     status_list) {
887953
-                        if (tmp->name && client->bound_xl &&
887953
-                            client->bound_xl->cleanup_starting &&
887953
-                            !strcmp (tmp->name, client->bound_xl->name)) {
887953
-                                xprtrefcount = GF_ATOMIC_GET (tmp->xprtrefcnt);
887953
-                                if (xprtrefcount > 0) {
887953
-                                        xprtrefcount = GF_ATOMIC_DEC (tmp->xprtrefcnt);
887953
-                                        if (xprtrefcount == 0)
887953
-                                                xlator_name = gf_strdup(client->bound_xl->name);
887953
-                                }
887953
-                                break;
887953
+
887953
+                if (client->bound_xl && client->bound_xl->cleanup_starting) {
887953
+                        xprtrefcount = GF_ATOMIC_GET (client->bound_xl->xprtrefcnt);
887953
+                        if (xprtrefcount > 0) {
887953
+                                xprtrefcount = GF_ATOMIC_DEC (client->bound_xl->xprtrefcnt);
887953
+                                if (xprtrefcount == 0)
887953
+                                        xlator_name = gf_strdup(client->bound_xl->name);
887953
                         }
887953
                 }
887953
-                pthread_mutex_unlock (&conf->mutex);
887953
 
887953
                 /* unref only for if (!client->lk_heal) */
887953
                 if (!conf->lk_heal)
887953
                         gf_client_unref (client);
887953
 
887953
                 if (xlator_name) {
887953
-                        if (this->ctx->active) {
887953
-                                top = this->ctx->active->first;
887953
-                                LOCK (&ctx->volfile_lock);
887953
-                                for (trav_p = &top->children; *trav_p;
887953
-                                                   trav_p = &(*trav_p)->next) {
887953
-                                        travxl = (*trav_p)->xlator;
887953
-                                        if (!travxl->call_cleanup &&
887953
-                                            strcmp (travxl->name, xlator_name) == 0) {
887953
-                                                victim_found = _gf_true;
887953
-                                                break;
887953
-                                        }
887953
-                                }
887953
-                                UNLOCK (&ctx->volfile_lock);
887953
-                                if (victim_found) {
887953
-                                        xlator_mem_cleanup (travxl);
887953
-                                        glusterfs_autoscale_threads (ctx, -1);
887953
-                                }
887953
-                        }
887953
+                        server_call_xlator_mem_cleanup (this, xlator_name);
887953
                         GF_FREE (xlator_name);
887953
                 }
887953
 
887953
@@ -701,6 +705,67 @@ out:
887953
         return 0;
887953
 }
887953
 
887953
+void *
887953
+server_graph_janitor_threads(void *data)
887953
+{
887953
+        xlator_t *victim = NULL;
887953
+        xlator_t *this = NULL;
887953
+        server_conf_t *conf = NULL;
887953
+        glusterfs_ctx_t *ctx = NULL;
887953
+        char *victim_name = NULL;
887953
+        server_cleanup_xprt_arg_t *arg = NULL;
887953
+        gf_boolean_t victim_found = _gf_false;
887953
+        xlator_list_t **trav_p = NULL;
887953
+        xlator_t *top = NULL;
887953
+
887953
+        GF_ASSERT(data);
887953
+
887953
+        arg = data;
887953
+        this = arg->this;
887953
+        victim_name = arg->victim_name;
887953
+        THIS = arg->this;
887953
+        conf = this->private;
887953
+
887953
+        ctx = THIS->ctx;
887953
+        GF_VALIDATE_OR_GOTO(this->name, ctx, out);
887953
+
887953
+        top = this->ctx->active->first;
887953
+        LOCK(&ctx->volfile_lock);
887953
+        for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
887953
+                victim = (*trav_p)->xlator;
887953
+                if (victim->cleanup_starting &&
887953
+                    strcmp(victim->name, victim_name) == 0) {
887953
+                        victim_found = _gf_true;
887953
+                        break;
887953
+                }
887953
+        }
887953
+        if (victim_found)
887953
+                glusterfs_delete_volfile_checksum(ctx, victim->volfile_id);
887953
+        UNLOCK(&ctx->volfile_lock);
887953
+        if (!victim_found) {
887953
+                gf_log(this->name, GF_LOG_ERROR,
887953
+                       "victim brick %s is not"
887953
+                       " found in graph",
887953
+                       victim_name);
887953
+                goto out;
887953
+        }
887953
+
887953
+        default_notify(victim, GF_EVENT_PARENT_DOWN, victim);
887953
+        if (victim->notify_down) {
887953
+                gf_log(THIS->name, GF_LOG_INFO,
887953
+                       "Start call fini for brick"
887953
+                       " %s stack",
887953
+                       victim->name);
887953
+                xlator_mem_cleanup(victim);
887953
+                glusterfs_autoscale_threads(ctx, -1);
887953
+        }
887953
+
887953
+out:
887953
+        GF_FREE(arg->victim_name);
887953
+        free(arg);
887953
+        return NULL;
887953
+}
887953
+
887953
 int32_t
887953
 mem_acct_init (xlator_t *this)
887953
 {
887953
@@ -1136,13 +1201,7 @@ init (xlator_t *this)
887953
         conf->child_status = GF_CALLOC (1, sizeof (struct _child_status),
887953
                                           gf_server_mt_child_status);
887953
         INIT_LIST_HEAD (&conf->child_status->status_list);
887953
-        GF_ATOMIC_INIT (conf->child_status->xprtrefcnt, 0);
887953
 
887953
-        /*ret = dict_get_str (this->options, "statedump-path", &statedump_path);
887953
-        if (!ret) {
887953
-                gf_path_strip_trailing_slashes (statedump_path);
887953
-                this->ctx->statedump_path = statedump_path;
887953
-        }*/
887953
         GF_OPTION_INIT ("statedump-path", statedump_path, path, out);
887953
         if (statedump_path) {
887953
                 gf_path_strip_trailing_slashes (statedump_path);
887953
@@ -1589,6 +1648,11 @@ notify (xlator_t *this, int32_t event, void *data, ...)
887953
 
887953
         case GF_EVENT_CHILD_DOWN:
887953
         {
887953
+                if (victim->cleanup_starting) {
887953
+                        victim->notify_down = 1;
887953
+                        gf_log(this->name, GF_LOG_INFO,
887953
+                               "Getting CHILD_DOWN event for brick %s", victim->name);
887953
+                }
887953
                 ret = server_process_child_event (this, event, data,
887953
                                                   GF_CBK_CHILD_DOWN);
887953
                 if (ret) {
887953
@@ -1622,7 +1686,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
887953
                                      status_list) {
887953
                         if (strcmp (tmp->name, victim->name) == 0) {
887953
                                 tmp->child_up = _gf_false;
887953
-                                GF_ATOMIC_INIT (tmp->xprtrefcnt, totxprt);
887953
+                                GF_ATOMIC_INIT (victim->xprtrefcnt, totxprt);
887953
                                 break;
887953
                         }
887953
                 }
887953
@@ -1668,8 +1732,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
887953
                         glusterfs_mgmt_pmap_signout (ctx,
887953
                                                      victim->name);
887953
                         if (!xprt_found && victim_found) {
887953
-                                xlator_mem_cleanup (victim);
887953
-                                glusterfs_autoscale_threads (ctx, -1);
887953
+                                server_call_xlator_mem_cleanup (this, victim);
887953
                         }
887953
                 }
887953
                 break;
887953
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
887953
index 691c75b..23775d4 100644
887953
--- a/xlators/protocol/server/src/server.h
887953
+++ b/xlators/protocol/server/src/server.h
887953
@@ -78,7 +78,6 @@ struct _child_status {
887953
         struct list_head status_list;
887953
         char *name;
887953
         gf_boolean_t child_up;
887953
-        gf_atomic_t  xprtrefcnt;
887953
 };
887953
 struct server_conf {
887953
         rpcsvc_t               *rpc;
887953
@@ -222,6 +221,10 @@ typedef struct _server_ctx {
887953
         uint32_t             lk_version;
887953
 } server_ctx_t;
887953
 
887953
+typedef struct server_cleanup_xprt_arg {
887953
+    xlator_t *this;
887953
+    char *victim_name;
887953
+} server_cleanup_xprt_arg_t;
887953
 
887953
 int
887953
 server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
887953
@@ -246,4 +249,8 @@ serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp);
887953
 
887953
 server_ctx_t*
887953
 server_ctx_get (client_t *client, xlator_t *xlator);
887953
+
887953
+void *
887953
+server_graph_janitor_threads(void *);
887953
+
887953
 #endif /* !_SERVER_H */
887953
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
887953
index 1d3f1ee..f79dbda 100644
887953
--- a/xlators/storage/posix/src/posix.c
887953
+++ b/xlators/storage/posix/src/posix.c
887953
@@ -7113,6 +7113,8 @@ notify (xlator_t *this,
887953
         void *data,
887953
         ...)
887953
 {
887953
+        xlator_t *victim = data;
887953
+
887953
         switch (event)
887953
         {
887953
         case GF_EVENT_PARENT_UP:
887953
@@ -7121,6 +7123,15 @@ notify (xlator_t *this,
887953
                 default_notify (this, GF_EVENT_CHILD_UP, data);
887953
         }
887953
         break;
887953
+        case GF_EVENT_PARENT_DOWN:
887953
+        {
887953
+                if (!victim->cleanup_starting)
887953
+                        break;
887953
+                gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
887953
+                       victim->name);
887953
+                default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
887953
+        }
887953
+        break;
887953
         default:
887953
                 /* */
887953
                 break;
887953
-- 
887953
1.8.3.1
887953