Blob Blame History Raw
From 53042d3aa2a619c0e5e56d0707ea9d503429d28a Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Mon, 12 Mar 2018 19:43:15 +0530
Subject: [PATCH 226/236] gluster: Sometimes Brick process is crashed at the
 time of stopping brick

Problem: Sometimes brick process is getting crashed at the time
         of stop brick while brick mux is enabled.

Solution: Brick process was getting crashed because of rpc connection
          was not cleaning properly while brick mux is enabled.In this patch
          after sending GF_EVENT_CLEANUP notification to xlator(server)
          waits for all rpc client connection destroy for specific xlator.Once rpc
          connections are destroyed in server_rpc_notify for all associated client
          for that brick then call xlator_mem_cleanup for for brick xlator as well as
          all child xlators.To avoid races at the time of cleanup introduce
          two new flags at each xlator cleanup_starting, call_cleanup.

> BUG: 1544090
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (cherry picked from commit 0043c63f70776444f69667a4ef9596217ecb42b7)
> (Upstream link for review https://review.gluster.org/#/c/19734/)
> Note: Run all test-cases in separate build (https://review.gluster.org/#/c/19700/)
>      with same patch after enable brick mux forcefully, all test cases are
>      passed.

BUG: 1535281
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Change-Id: Ic4ab9c128df282d146cf1135640281fcb31997bf
Reviewed-on: https://code.engineering.redhat.com/gerrit/136229
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 glusterfsd/src/glusterfsd-mgmt.c               |  96 +++++++++++---
 libglusterfs/src/client_t.c                    |   4 +-
 libglusterfs/src/fd.c                          |   4 +-
 libglusterfs/src/graph.c                       |   6 +-
 libglusterfs/src/inode.c                       |   2 +-
 libglusterfs/src/statedump.c                   |  61 ++++++---
 libglusterfs/src/xlator.c                      |   8 +-
 libglusterfs/src/xlator.h                      |  11 +-
 tests/volume.rc                                |  31 ++++-
 xlators/features/changelog/src/changelog-rpc.c |   9 ++
 xlators/features/changelog/src/changelog.c     |   5 +
 xlators/features/trash/src/trash.c             |  18 ++-
 xlators/protocol/server/src/server-handshake.c |  25 +++-
 xlators/protocol/server/src/server.c           | 170 ++++++++++++++++++-------
 xlators/protocol/server/src/server.h           |   2 +-
 xlators/storage/posix/src/posix-helpers.c      |  14 +-
 xlators/storage/posix/src/posix.c              |   6 +-
 17 files changed, 362 insertions(+), 110 deletions(-)

diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 8f4450b..fdf403c 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -206,11 +206,6 @@ xlator_mem_free (xlator_t *xl)
         if (!xl)
                 return 0;
 
-        GF_FREE (xl->name);
-        GF_FREE (xl->type);
-        xl->name = NULL;
-        xl->type = NULL;
-
         if (xl->options) {
                 dict_ref (xl->options);
                 dict_unref (xl->options);
@@ -222,25 +217,38 @@ xlator_mem_free (xlator_t *xl)
                 GF_FREE (vol_opt);
         }
 
+        xlator_memrec_free (xl);
+
         return 0;
 }
 
 void
 xlator_call_fini (xlator_t *this) {
-        if (!this)
+        if (!this || this->cleanup_starting)
                 return;
+        this->cleanup_starting = 1;
+        this->call_cleanup = 1;
         xlator_call_fini (this->next);
         this->fini (this);
 }
 
 void
 xlator_mem_cleanup (xlator_t *this) {
-        xlator_list_t     *list = this->children;
-        xlator_t          *trav = list->xlator;
-        inode_table_t     *inode_table = NULL;
-        xlator_t          *prev = trav;
+        xlator_list_t     *list         = this->children;
+        xlator_t          *trav         = list->xlator;
+        inode_table_t     *inode_table  = NULL;
+        xlator_t          *prev         = trav;
+        glusterfs_ctx_t   *ctx          = NULL;
+        xlator_list_t    **trav_p       = NULL;
+        xlator_t          *top          = NULL;
+        xlator_t          *victim       = NULL;
 
-        inode_table = this->itable;
+
+        if (this->call_cleanup)
+                return;
+
+        this->call_cleanup = 1;
+        ctx = glusterfsd_ctx;
 
         xlator_call_fini (trav);
 
@@ -250,6 +258,7 @@ xlator_mem_cleanup (xlator_t *this) {
                 prev = trav;
         }
 
+        inode_table = this->itable;
         if (inode_table) {
                 inode_table_destroy (inode_table);
                 this->itable = NULL;
@@ -260,6 +269,33 @@ xlator_mem_cleanup (xlator_t *this) {
         }
 
         xlator_mem_free (this);
+
+        if (glusterfsd_ctx->active) {
+                top = glusterfsd_ctx->active->first;
+                LOCK (&ctx->volfile_lock);
+                /* TODO here we have leak for xlator node in a graph */
+                for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+                        victim = (*trav_p)->xlator;
+                        if (victim->call_cleanup && !strcmp (victim->name, this->name)) {
+                                        (*trav_p) = (*trav_p)->next;
+                                        break;
+                        }
+                }
+                /* TODO Sometime brick xlator is not moved from graph so followed below
+                   approach to move brick xlator from a graph, will move specific brick
+                   xlator from graph only while inode table and mem_acct are cleaned up
+                */
+                trav_p = &top->children;
+                while (*trav_p) {
+                        victim = (*trav_p)->xlator;
+                        if (victim->call_cleanup && !victim->itable && !victim->mem_acct) {
+                                (*trav_p) = (*trav_p)->next;
+                        } else {
+                                trav_p = &(*trav_p)->next;
+                        }
+                }
+                UNLOCK (&ctx->volfile_lock);
+        }
 }
 
 
@@ -271,8 +307,10 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
         glusterfs_ctx_t         *ctx            = NULL;
         xlator_t                *top            = NULL;
         xlator_t                *victim         = NULL;
+        xlator_t                *tvictim        = NULL;
         xlator_list_t           **trav_p        = NULL;
         gf_boolean_t            lockflag        = _gf_false;
+        gf_boolean_t            last_brick      = _gf_false;
 
         ret = xdr_to_generic (req->msg[0], &xlator_req,
                               (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -290,11 +328,15 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
                         for (trav_p = &top->children; *trav_p;
                                                     trav_p = &(*trav_p)->next) {
                                 victim = (*trav_p)->xlator;
-                                if (strcmp (victim->name, xlator_req.name) == 0) {
+                                if (!victim->cleanup_starting && strcmp (victim->name, xlator_req.name) == 0) {
                                         break;
                                 }
                         }
                 }
+
+                if (!top)
+                        goto err;
+
         }
         if (!*trav_p) {
                 gf_log (THIS->name, GF_LOG_ERROR,
@@ -310,26 +352,38 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
         }
 
         glusterfs_terminate_response_send (req, 0);
-        if ((trav_p == &top->children) && !(*trav_p)->next) {
+        for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+                tvictim = (*trav_p)->xlator;
+                if (!tvictim->cleanup_starting && !strcmp (tvictim->name, xlator_req.name)) {
+                        continue;
+                }
+                if (!tvictim->cleanup_starting) {
+                        last_brick = _gf_true;
+                        break;
+                }
+        }
+        if (!last_brick) {
                 gf_log (THIS->name, GF_LOG_INFO,
                         "terminating after loss of last child %s",
                         xlator_req.name);
                 glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
                 kill (getpid(), SIGTERM);
         } else {
-                /*
-                 * This is terribly unsafe without quiescing or shutting
-                 * things down properly but it gets us to the point
-                 * where we can test other stuff.
-                 *
-                 * TBD: finish implementing this "detach" code properly
-                 */
+                /* TODO cleanup sequence needs to be done properly for
+                   Quota and Changelog
+                */
+                if (victim->cleanup_starting)
+                        goto err;
+
+                victim->cleanup_starting = 1;
+
                 UNLOCK (&ctx->volfile_lock);
                 lockflag = _gf_true;
+
                 gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
                          " child %s", xlator_req.name);
                 top->notify (top, GF_EVENT_CLEANUP, victim);
-                xlator_mem_cleanup (victim);
+
         }
 err:
         if (!lockflag)
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
index eda1c46..55d891f 100644
--- a/libglusterfs/src/client_t.c
+++ b/libglusterfs/src/client_t.c
@@ -338,7 +338,7 @@ gf_client_destroy_recursive (xlator_t *xl, client_t *client)
 {
         xlator_list_t   *trav;
 
-        if (xl->cbks->client_destroy) {
+        if (!xl->call_cleanup && xl->cbks->client_destroy) {
                 xl->cbks->client_destroy (xl, client);
         }
 
@@ -397,7 +397,7 @@ gf_client_disconnect_recursive (xlator_t *xl, client_t *client)
         int             ret     = 0;
         xlator_list_t   *trav;
 
-        if (xl->cbks->client_disconnect) {
+        if (!xl->call_cleanup && xl->cbks->client_disconnect) {
                 ret = xl->cbks->client_disconnect (xl, client);
         }
 
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 45b0d32..2dc52ba 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -500,7 +500,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound)
                                 xl = fd->_ctx[i].xl_key;
                                 old_THIS = THIS;
                                 THIS = xl;
-                                if (xl->cbks->releasedir)
+                                if (!xl->call_cleanup && xl->cbks->releasedir)
                                         xl->cbks->releasedir (xl, fd);
                                 THIS = old_THIS;
                         }
@@ -511,7 +511,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound)
                                 xl = fd->_ctx[i].xl_key;
                                 old_THIS = THIS;
                                 THIS = xl;
-                                if (xl->cbks->release)
+                                if (!xl->call_cleanup && xl->cbks->release)
                                         xl->cbks->release (xl, fd);
                                 THIS = old_THIS;
                         }
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index cdd7123..c0e67e4 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -841,7 +841,7 @@ is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2)
                 trav2 = trav2->children->xlator;
                 for (ltrav = trav1->children; ltrav; ltrav = ltrav->next) {
                         trav1 = ltrav->xlator;
-                        if (strcmp (trav1->name, trav2->name) == 0) {
+                        if (!trav1->cleanup_starting && !strcmp (trav1->name, trav2->name)) {
                                 break;
                         }
                 }
@@ -1088,7 +1088,7 @@ glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
         new_xl = FIRST_CHILD (new_xl);
 
         for (trav = old_xl->children; trav; trav = trav->next) {
-                if (strcmp (trav->xlator->name, new_xl->name) == 0) {
+                if (!trav->xlator->cleanup_starting && !strcmp (trav->xlator->name, new_xl->name)) {
                         return xlator_tree_reconfigure (trav->xlator, new_xl);
                 }
         }
@@ -1240,7 +1240,7 @@ glusterfs_graph_attach (glusterfs_graph_t *orig_graph, char *path,
                 xl->volfile_id[strlen(xl->volfile_id)-4] = '\0';
         }
 
-        /* TBD: memory leaks everywhere */
+        /* TODO memory leaks everywhere need to free graph in case of error */
         if (glusterfs_graph_prepare (graph, this->ctx, xl->name)) {
                 gf_log (this->name, GF_LOG_WARNING,
                         "failed to prepare graph for xlator %s", xl->name);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index ffba1bf..cf264d8 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -328,7 +328,7 @@ __inode_ctx_free (inode_t *inode)
                         xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
                         old_THIS = THIS;
                         THIS = xl;
-                        if (xl->cbks->forget)
+                        if (!xl->call_cleanup && xl->cbks->forget)
                                 xl->cbks->forget (xl, inode);
                         THIS = old_THIS;
                 }
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index 697ddc3..e9ecef5 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -483,21 +483,14 @@ gf_proc_dump_dict_info (glusterfs_ctx_t *ctx)
                             (total_pairs / total_dicts));
 }
 
-void
-gf_proc_dump_xlator_info (xlator_t *top)
+static void
+gf_proc_dump_per_xlator_info (xlator_t *top)
 {
-        xlator_t        *trav = NULL;
-        glusterfs_ctx_t *ctx = NULL;
+        xlator_t        *trav = top;
+        glusterfs_ctx_t *ctx = top->ctx;
         char             itable_key[1024] = {0,};
 
-        if (!top)
-                return;
-
-        ctx = top->ctx;
-
-        trav = top;
-        while (trav) {
-
+        while (trav && !trav->cleanup_starting) {
                 if (ctx->measure_latency)
                         gf_proc_dump_latency_info (trav);
 
@@ -521,7 +514,6 @@ gf_proc_dump_xlator_info (xlator_t *top)
                 if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
                     (trav->dumpops->inode))
                         trav->dumpops->inode (trav);
-
                 if (trav->dumpops->fd &&
                     GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
                         trav->dumpops->fd (trav);
@@ -532,6 +524,30 @@ gf_proc_dump_xlator_info (xlator_t *top)
 
                 trav = trav->next;
         }
+}
+
+
+
+void
+gf_proc_dump_xlator_info (xlator_t *top, gf_boolean_t brick_mux)
+{
+        xlator_t        *trav = NULL;
+        xlator_list_t   **trav_p        = NULL;
+
+        if (!top)
+                return;
+
+        trav = top;
+        gf_proc_dump_per_xlator_info (trav);
+
+        if (brick_mux) {
+                trav_p = &top->children;
+                while (*trav_p) {
+                        trav = (*trav_p)->xlator;
+                        gf_proc_dump_per_xlator_info (trav);
+                        trav_p = &(*trav_p)->next;
+                }
+        }
 
         return;
 }
@@ -785,12 +801,27 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx)
         char               tmp_dump_name[PATH_MAX] = {0,};
         char               path[PATH_MAX]          = {0,};
         struct timeval     tv                      = {0,};
+        gf_boolean_t       is_brick_mux            = _gf_false;
+        xlator_t          *top                     = NULL;
+        xlator_list_t    **trav_p                 = NULL;
+        int                brick_count            = 0;
 
         gf_proc_dump_lock ();
 
         if (!ctx)
                 goto out;
 
+        if (ctx) {
+                top = ctx->active->first;
+                for (trav_p = &top->children; *trav_p;
+                                     trav_p = &(*trav_p)->next) {
+                        brick_count++;
+                }
+
+                if (brick_count > 1)
+                        is_brick_mux = _gf_true;
+        }
+
         if (ctx->cmd_args.brick_name) {
                 GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name);
         } else
@@ -850,12 +881,12 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx)
 
         if (ctx->master) {
                 gf_proc_dump_add_section ("fuse");
-                gf_proc_dump_xlator_info (ctx->master);
+                gf_proc_dump_xlator_info (ctx->master, _gf_false);
         }
 
         if (ctx->active) {
                 gf_proc_dump_add_section ("active graph - %d", ctx->graph_id);
-                gf_proc_dump_xlator_info (ctx->active->top);
+                gf_proc_dump_xlator_info (ctx->active->top, is_brick_mux);
         }
 
         i = 0;
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index de97dff..4722800 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -424,7 +424,7 @@ get_xlator_by_name_or_type (xlator_t *this, char *target, int is_name)
 
         for (trav = this->children; trav; trav = trav->next) {
                 value = is_name ? trav->xlator->name : trav->xlator->type;
-                if (strcmp(value, target) == 0) {
+                if (!strcmp(value, target)) {
                         return trav->xlator;
                 }
                 child_xl = get_xlator_by_name_or_type (trav->xlator, target,
@@ -505,7 +505,9 @@ xlator_init (xlator_t *xl)
         }
 
         xl->init_succeeded = 1;
-
+        /*xl->cleanup_starting = 0;
+          xl->call_cleanup = 0;
+        */
         ret = 0;
 out:
         return ret;
@@ -638,7 +640,7 @@ xlator_list_destroy (xlator_list_t *list)
         return 0;
 }
 
-static int
+int
 xlator_memrec_free (xlator_t *xl)
 {
         uint32_t        i               = 0;
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 26d2cc5..5ed8646 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -957,6 +957,14 @@ struct _xlator {
 
         /* Its used as an index to inode_ctx*/
         uint32_t            xl_id;
+
+        /* cleanup flag to avoid races during xlator cleanup */
+        uint32_t cleanup_starting;
+
+        /* flag to avoid recall of xlator_mem_cleanup for xame xlator */
+        uint32_t call_cleanup;
+
+
 };
 
 typedef struct {
@@ -1069,5 +1077,6 @@ copy_opts_to_child (xlator_t *src, xlator_t *dst, char *glob);
 int
 glusterfs_delete_volfile_checksum (glusterfs_ctx_t *ctx,
                                    const char *volfile_id);
-
+int
+xlator_memrec_free (xlator_t *xl);
 #endif /* _XLATOR_H */
diff --git a/tests/volume.rc b/tests/volume.rc
index d57aa93..f9e16c5 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -797,26 +797,53 @@ function count_sh_entries()
     ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
 }
 
+function check_brick_multiplex() {
+        cnt="$(ls /var/log/glusterfs/bricks|wc -l)"
+        local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2)
+        local bcnt="$(brick_count)"
+
+        if [ $bcnt -ne 1 ]; then
+           if [ "$ret" = "on" ] || [ $cnt -eq 1 ]; then
+              echo "Y"
+           else
+              echo "N"
+           fi
+        else
+           echo "N"
+        fi
+}
+
 function get_fd_count {
         local vol=$1
         local host=$2
         local brick=$3
         local fname=$4
+        local val="$(check_brick_multiplex)"
         local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
         local statedump=$(generate_brick_statedump $vol $host $brick)
-        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
+        if [ $val == "N" ]; then
+            count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+        else
+            count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+        fi
         rm -f $statedump
         echo $count
 }
 
+
 function get_active_fd_count {
         local vol=$1
         local host=$2
         local brick=$3
         local fname=$4
+        local val="$(check_brick_multiplex)"
         local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
         local statedump=$(generate_brick_statedump $vol $host $brick)
-        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
+        if [ $val == "N" ]; then
+            count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+        else
+            count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+        fi
         rm -f $statedump
         echo $count
 }
diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c
index 5524e43..ccb22b5 100644
--- a/xlators/features/changelog/src/changelog-rpc.c
+++ b/xlators/features/changelog/src/changelog-rpc.c
@@ -255,6 +255,15 @@ changelog_handle_probe (rpcsvc_request_t *req)
         changelog_probe_req   rpc_req = {0,};
         changelog_probe_rsp   rpc_rsp = {0,};
 
+
+        this = req->trans->xl;
+        if (this->cleanup_starting) {
+                gf_msg (this->name, GF_LOG_DEBUG, 0,
+                        CHANGELOG_MSG_HANDLE_PROBE_ERROR,
+                        "cleanup_starting flag is already set for xl");
+                return 0;
+        }
+
         ret = xdr_to_generic (req->msg[0],
                               &rpc_req, (xdrproc_t)xdr_changelog_probe_req);
         if (ret < 0) {
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index a472208..121ab04 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -2877,6 +2877,7 @@ void
 fini (xlator_t *this)
 {
         changelog_priv_t *priv = NULL;
+        struct list_head  queue = {0, };
 
         priv = this->private;
 
@@ -2884,6 +2885,10 @@ fini (xlator_t *this)
                 /* terminate RPC server/threads */
                 changelog_cleanup_rpc (this, priv);
 
+                /* call barrier_disable to cancel timer */
+                if (priv->barrier_enabled)
+                        __chlog_barrier_disable (this, &queue);
+
                 /* cleanup barrier related objects */
                 changelog_barrier_pthread_destroy (priv);
 
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index d114858..35db245 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -2591,16 +2591,24 @@ fini (xlator_t *this)
 
         GF_VALIDATE_OR_GOTO ("trash", this, out);
         priv = this->private;
-        inode_table = priv->trash_itable;
         if (priv) {
-                if (priv->newtrash_dir)
+                inode_table = priv->trash_itable;
+                if (priv->newtrash_dir) {
                         GF_FREE (priv->newtrash_dir);
-                if (priv->oldtrash_dir)
+                        priv->newtrash_dir = NULL;
+                }
+                if (priv->oldtrash_dir) {
                         GF_FREE (priv->oldtrash_dir);
-                if (priv->brick_path)
+                        priv->oldtrash_dir = NULL;
+                }
+                if (priv->brick_path) {
                         GF_FREE (priv->brick_path);
-                if (priv->eliminate)
+                        priv->brick_path = NULL;
+                }
+                if (priv->eliminate) {
                         wipe_eliminate_path (&priv->eliminate);
+                        priv->eliminate = NULL;
+                }
                 if (inode_table) {
                         inode_table_destroy (inode_table);
                         priv->trash_itable = NULL;
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 50796d5..cbc93a3 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -475,6 +475,7 @@ server_setvolume (rpcsvc_request_t *req)
         glusterfs_ctx_t     *ctx           = NULL;
         struct  _child_status *tmp         = NULL;
         char                *subdir_mount  = NULL;
+        gf_boolean_t         cleanup_starting = _gf_false;
 
         params = dict_new ();
         reply  = dict_new ();
@@ -574,11 +575,13 @@ server_setvolume (rpcsvc_request_t *req)
                                       "initialised yet. Try again later");
                 goto fail;
         }
+
         list_for_each_entry (tmp, &conf->child_status->status_list,
                                                                   status_list) {
                 if (strcmp (tmp->name, name) == 0)
                         break;
         }
+
         if (!tmp->name) {
                 gf_msg (this->name, GF_LOG_ERROR, 0,
                         PS_MSG_CHILD_STATUS_FAILED,
@@ -592,6 +595,7 @@ server_setvolume (rpcsvc_request_t *req)
                                 "Failed to set 'child_up' for xlator %s "
                                 "in the reply dict", tmp->name);
         }
+
         ret = dict_get_str (params, "process-uuid", &client_uid);
         if (ret < 0) {
                 ret = dict_set_str (reply, "ERROR",
@@ -665,8 +669,27 @@ server_setvolume (rpcsvc_request_t *req)
                                                   INTERNAL_LOCKS | POSIX_LOCKS);
         }
 
-        if (req->trans->xl_private != client)
+        pthread_mutex_lock (&conf->mutex);
+        if (xl->cleanup_starting) {
+                cleanup_starting = _gf_true;
+        } else if (req->trans->xl_private != client) {
                 req->trans->xl_private = client;
+        }
+        pthread_mutex_unlock (&conf->mutex);
+
+        if (cleanup_starting) {
+                op_ret = -1;
+                op_errno = EAGAIN;
+
+                ret = dict_set_str (reply, "ERROR",
+                                    "cleanup flag is set for xlator. "
+                                    " Try again later");
+                if (ret < 0)
+                        gf_msg_debug (this->name, 0, "failed to set error: "
+                                      "cleanup flag is set for xlator. "
+                                      "Try again later");
+                goto fail;
+        }
 
         auth_set_username_passwd (params, config_params, client);
         if (req->trans->ssl_name) {
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 737bb96..792dfb3 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -467,7 +467,16 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
         server_ctx_t        *serv_ctx   = NULL;
         struct timespec     grace_ts    = {0, };
         char                *auth_path  = NULL;
-        int                 ret         = -1;
+        int                  ret        = -1;
+        gf_boolean_t         victim_found = _gf_false;
+        char                *xlator_name  = NULL;
+        glusterfs_ctx_t     *ctx          = NULL;
+        xlator_t            *top          = NULL;
+        xlator_list_t      **trav_p       = NULL;
+        xlator_t            *travxl       = NULL;
+        uint64_t             xprtrefcount = 0;
+        struct _child_status *tmp         = NULL;
+
 
         if (!xl || !data) {
                 gf_msg_callingfn ("server", GF_LOG_WARNING, 0,
@@ -479,6 +488,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
         this = xl;
         trans = data;
         conf = this->private;
+        ctx = glusterfsd_ctx;
 
         switch (event) {
         case RPCSVC_EVENT_ACCEPT:
@@ -626,11 +636,49 @@ unref_transport:
                 client = trans->xl_private;
                 if (!client)
                         break;
+                pthread_mutex_lock (&conf->mutex);
+                list_for_each_entry (tmp, &conf->child_status->status_list,
+                                     status_list) {
+                        if (tmp->name && client->bound_xl &&
+                            client->bound_xl->cleanup_starting &&
+                            !strcmp (tmp->name, client->bound_xl->name)) {
+                                xprtrefcount = GF_ATOMIC_GET (tmp->xprtrefcnt);
+                                if (xprtrefcount > 0) {
+                                        xprtrefcount = GF_ATOMIC_DEC (tmp->xprtrefcnt);
+                                        if (xprtrefcount == 0)
+                                                xlator_name = gf_strdup(client->bound_xl->name);
+                                }
+                                break;
+                        }
+                }
+                pthread_mutex_unlock (&conf->mutex);
 
                 /* unref only for if (!client->lk_heal) */
                 if (!conf->lk_heal)
                         gf_client_unref (client);
 
+                if (xlator_name) {
+                        if (this->ctx->active) {
+                                top = this->ctx->active->first;
+                                LOCK (&ctx->volfile_lock);
+                                for (trav_p = &top->children; *trav_p;
+                                                   trav_p = &(*trav_p)->next) {
+                                        travxl = (*trav_p)->xlator;
+                                        if (!travxl->call_cleanup &&
+                                            strcmp (travxl->name, xlator_name) == 0) {
+                                                victim_found = _gf_true;
+                                                break;
+                                        }
+                                }
+                                UNLOCK (&ctx->volfile_lock);
+                                if (victim_found) {
+                                        xlator_mem_cleanup (travxl);
+                                        glusterfs_autoscale_threads (ctx, -1, this);
+                                }
+                        }
+                        GF_FREE (xlator_name);
+                }
+
                 trans->xl_private = NULL;
                 break;
         default:
@@ -1078,6 +1126,7 @@ init (xlator_t *this)
         conf->child_status = GF_CALLOC (1, sizeof (struct _child_status),
                                           gf_server_mt_child_status);
         INIT_LIST_HEAD (&conf->child_status->status_list);
+        GF_ATOMIC_INIT (conf->child_status->xprtrefcnt, 0);
 
         /*ret = dict_get_str (this->options, "statedump-path", &statedump_path);
         if (!ret) {
@@ -1392,14 +1441,53 @@ server_process_child_event (xlator_t *this, int32_t event, void *data,
         int              ret          = -1;
         server_conf_t    *conf        = NULL;
         rpc_transport_t  *xprt        = NULL;
+        xlator_t         *victim      = NULL;
+        struct  _child_status *tmp    = NULL;
 
         GF_VALIDATE_OR_GOTO(this->name, data, out);
 
         conf = this->private;
         GF_VALIDATE_OR_GOTO(this->name, conf, out);
 
+        victim = data;
         pthread_mutex_lock (&conf->mutex);
         {
+                if (cbk_procnum == GF_CBK_CHILD_UP) {
+                        list_for_each_entry (tmp, &conf->child_status->status_list,
+                                             status_list) {
+                                if (tmp->name == NULL)
+                                        break;
+                                if (strcmp (tmp->name, victim->name) == 0)
+                                        break;
+                        }
+                        if (tmp->name) {
+                                tmp->child_up = _gf_true;
+                        } else {
+                                tmp  = GF_CALLOC (1, sizeof (struct _child_status),
+                                                  gf_server_mt_child_status);
+                                INIT_LIST_HEAD (&tmp->status_list);
+                                tmp->name  = gf_strdup (victim->name);
+                                tmp->child_up = _gf_true;
+                                list_add_tail (&tmp->status_list,
+                                               &conf->child_status->status_list);
+                        }
+                }
+
+                if (cbk_procnum == GF_CBK_CHILD_DOWN) {
+                        list_for_each_entry (tmp, &conf->child_status->status_list,
+                                             status_list) {
+                                if (strcmp (tmp->name, victim->name) == 0) {
+                                        tmp->child_up = _gf_false;
+                                        break;
+                                }
+                        }
+
+                        if (!tmp->name)
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        PS_MSG_CHILD_STATUS_FAILED,
+                                        "No xlator %s is found in "
+                                        "child status list", victim->name);
+                }
                 list_for_each_entry (xprt, &conf->xprt_list, list) {
                         if (!xprt->xl_private) {
                                 continue;
@@ -1433,6 +1521,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
         struct  _child_status *tmp    = NULL;
         gf_boolean_t     victim_found = _gf_false;
         glusterfs_ctx_t  *ctx         = NULL;
+        gf_boolean_t     xprt_found   = _gf_false;
+        uint64_t         totxprt      = 0;
 
         GF_VALIDATE_OR_GOTO (THIS->name, this, out);
         conf = this->private;
@@ -1467,24 +1557,6 @@ notify (xlator_t *this, int32_t event, void *data, ...)
 
         case GF_EVENT_CHILD_UP:
         {
-                list_for_each_entry (tmp, &conf->child_status->status_list,
-                                                                 status_list) {
-                        if (tmp->name == NULL)
-                                break;
-                        if (strcmp (tmp->name, victim->name) == 0)
-                                break;
-                }
-                if (tmp->name) {
-                        tmp->child_up = _gf_true;
-                } else {
-                        tmp  = GF_CALLOC (1, sizeof (struct _child_status),
-                                          gf_server_mt_child_status);
-                        INIT_LIST_HEAD (&tmp->status_list);
-                        tmp->name  = gf_strdup (victim->name);
-                        tmp->child_up = _gf_true;
-                        list_add_tail (&tmp->status_list,
-                                              &conf->child_status->status_list);
-                }
                 ret = server_process_child_event (this, event, data,
                                                   GF_CBK_CHILD_UP);
                 if (ret) {
@@ -1499,19 +1571,6 @@ notify (xlator_t *this, int32_t event, void *data, ...)
 
         case GF_EVENT_CHILD_DOWN:
         {
-                list_for_each_entry (tmp, &conf->child_status->status_list,
-                                                                  status_list) {
-                        if (strcmp (tmp->name, victim->name) == 0) {
-                                tmp->child_up = _gf_false;
-                                break;
-                        }
-                }
-                if (!tmp->name)
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                PS_MSG_CHILD_STATUS_FAILED,
-                                "No xlator %s is found in "
-                                "child status list", victim->name);
-
                 ret = server_process_child_event (this, event, data,
                                                   GF_CBK_CHILD_DOWN);
                 if (ret) {
@@ -1528,6 +1587,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
         case GF_EVENT_CLEANUP:
                 conf = this->private;
                 pthread_mutex_lock (&conf->mutex);
+                /* Calculate total no. of xprt available in list for this
+                   brick xlator
+                 */
+                list_for_each_entry_safe (xprt, xp_next,
+                                          &conf->xprt_list, list) {
+                        if (!xprt->xl_private) {
+                                continue;
+                        }
+                        if (xprt->xl_private->bound_xl == data) {
+                                totxprt++;
+                        }
+                }
+
+                list_for_each_entry (tmp, &conf->child_status->status_list,
+                                     status_list) {
+                        if (strcmp (tmp->name, victim->name) == 0) {
+                                tmp->child_up = _gf_false;
+                                GF_ATOMIC_INIT (tmp->xprtrefcnt, totxprt);
+                                break;
+                        }
+                }
+
                 /*
                  * Disconnecting will (usually) drop the last ref, which will
                  * cause the transport to be unlinked and freed while we're
@@ -1543,18 +1624,11 @@ notify (xlator_t *this, int32_t event, void *data, ...)
                                 gf_log (this->name, GF_LOG_INFO,
                                         "disconnecting %s",
                                         xprt->peerinfo.identifier);
+                                xprt_found = _gf_true;
                                 rpc_transport_disconnect (xprt, _gf_false);
                         }
                 }
-                list_for_each_entry (tmp, &conf->child_status->status_list,
-                                                                 status_list) {
-                        if (strcmp (tmp->name, victim->name) == 0)
-                                break;
-                }
-                if (tmp->name && (strcmp (tmp->name, victim->name) == 0)) {
-                        GF_FREE (tmp->name);
-                        list_del (&tmp->status_list);
-                }
+
                 pthread_mutex_unlock (&conf->mutex);
                 if (this->ctx->active) {
                         top = this->ctx->active->first;
@@ -1562,8 +1636,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
                                 for (trav_p = &top->children; *trav_p;
                                                    trav_p = &(*trav_p)->next) {
                                         travxl = (*trav_p)->xlator;
-                                        if (travxl &&
-                                                   strcmp (travxl->name, victim->name) == 0) {
+                                        if (!travxl->call_cleanup &&
+                                            strcmp (travxl->name, victim->name) == 0) {
                                                 victim_found = _gf_true;
                                                 break;
                                         }
@@ -1572,12 +1646,14 @@ notify (xlator_t *this, int32_t event, void *data, ...)
                                         glusterfs_delete_volfile_checksum (ctx,
                                                  victim->volfile_id);
                         UNLOCK (&ctx->volfile_lock);
-                        if (victim_found)
-                                (*trav_p) = (*trav_p)->next;
+
                         glusterfs_mgmt_pmap_signout (ctx,
                                                      victim->name);
-                        /* we need the protocol/server xlator here as 'this' */
-                        glusterfs_autoscale_threads (ctx, -1, this);
+
+                        if (!xprt_found && victim_found) {
+                                xlator_mem_cleanup (victim);
+                                glusterfs_autoscale_threads (ctx, -1, this);
+                        }
                 }
                 break;
 
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index ea15b56..af987aa 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -77,7 +77,7 @@ struct _child_status {
         struct list_head status_list;
         char *name;
         gf_boolean_t child_up;
-
+        gf_atomic_t  xprtrefcnt;
 };
 struct server_conf {
         rpcsvc_t               *rpc;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 334175d..49f9f46 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1921,6 +1921,12 @@ out:
         return NULL;
 
 abort:
+        LOCK (&priv->lock);
+        {
+                priv->health_check_active = _gf_false;
+        }
+        UNLOCK (&priv->lock);
+
         /* health-check failed */
         gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
                 "health-check failed, going down");
@@ -1961,18 +1967,18 @@ abort:
                         for (trav_p = &top->children; *trav_p;
                              trav_p = &(*trav_p)->next) {
                                 victim = (*trav_p)->xlator;
-                                if (victim &&
-                                         strcmp (victim->name, priv->base_path) == 0) {
+                                if (!victim->call_cleanup &&
+                                    strcmp (victim->name, priv->base_path) == 0) {
                                         victim_found = _gf_true;
                                         break;
                                 }
                         }
                 UNLOCK (&ctx->volfile_lock);
-                if (victim_found) {
+                if (victim_found && !victim->cleanup_starting) {
                         gf_log (THIS->name, GF_LOG_INFO, "detaching not-only "
                                 " child %s", priv->base_path);
+                        victim->cleanup_starting = 1;
                         top->notify (top, GF_EVENT_CLEANUP, victim);
-                        xlator_mem_cleanup (victim);
                 }
         }
 
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 74ee98f..c680e3f 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -7899,12 +7899,13 @@ fini (xlator_t *this)
         struct posix_private *priv = this->private;
         if (!priv)
                 return;
-        this->private = NULL;
-        if (priv->health_check) {
+        LOCK (&priv->lock);
+        if (priv->health_check_active) {
                 priv->health_check_active = _gf_false;
                 pthread_cancel (priv->health_check);
                 priv->health_check = 0;
         }
+        UNLOCK (&priv->lock);
         if (priv->disk_space_check) {
                 priv->disk_space_check_active = _gf_false;
                 pthread_cancel (priv->disk_space_check);
@@ -7929,6 +7930,7 @@ fini (xlator_t *this)
         GF_FREE (priv->hostname);
         GF_FREE (priv->trash_path);
         GF_FREE (priv);
+        this->private = NULL;
 
         return;
 }
-- 
1.8.3.1