Blob Blame History Raw
From f58abec63fb325e0e1c21fe3fe127de2e4a85d7d Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Thu, 24 Jan 2019 18:45:54 +0530
Subject: [PATCH 509/510] core: heketi-cli is throwing error "target is busy"

Problem: At the time of deleting block hosting volume
         through heketi-cli , it is throwing an error "target is busy".
         cli is throwing an error because brick is not detached successfully
         and brick is not detached due to race condition to cleanp xprt
         associated with detached brick

Solution: To avoid xprt specifc race condition introduce an atomic flag
          on rpc_transport

> Change-Id: Id4ff1fe8375a63be71fb3343f455190a1b8bb6d4
> fixes: bz#1668190
> (Cherry pick from commit 04f84756e1baa5eff4560339700f82970eaa5d80)
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22073/)

Change-Id: Ie3786b569ee03569bc3ac970925732dd834a76dc
BUG: 1669020
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/161388
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 rpc/rpc-lib/src/rpc-transport.c      |  1 +
 rpc/rpc-lib/src/rpc-transport.h      |  1 +
 xlators/protocol/server/src/server.c | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index 77abf9617..0c6ab6694 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -371,6 +371,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
 	}
 
         INIT_LIST_HEAD (&trans->list);
+        GF_ATOMIC_INIT(trans->disconnect_progress, 0);
 
         return_trans = trans;
 
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index 23246c564..f5fb6e13b 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -217,6 +217,7 @@ struct rpc_transport {
          * layer or in client management notification handler functions
          */
         gf_boolean_t               connect_failed;
+        gf_atomic_t                disconnect_progress;
 };
 
 struct rpc_transport_ops {
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 104615265..ba3b8316d 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -553,6 +553,11 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
                         break;
                 }
 
+                /* Set the disconnect_progress flag to 1 to avoid races
+                   during brick detach while brick mux is enabled
+                */
+                GF_ATOMIC_INIT(trans->disconnect_progress, 1);
+
                 /* transport has to be removed from the list upon disconnect
                  * irrespective of whether lock self heal is off or on, since
                  * new transport will be created upon reconnect.
@@ -1638,6 +1643,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
         glusterfs_ctx_t  *ctx         = NULL;
         gf_boolean_t     xprt_found   = _gf_false;
         uint64_t         totxprt      = 0;
+        uint64_t         totdisconnect = 0;
 
         GF_VALIDATE_OR_GOTO (THIS->name, this, out);
         conf = this->private;
@@ -1715,6 +1721,10 @@ notify (xlator_t *this, int32_t event, void *data, ...)
                         if (!xprt->xl_private) {
                                 continue;
                         }
+
+                        if (GF_ATOMIC_GET(xprt->disconnect_progress))
+                                continue;
+
                         if (xprt->xl_private->bound_xl == data) {
                                 totxprt++;
                         }
@@ -1740,15 +1750,23 @@ notify (xlator_t *this, int32_t event, void *data, ...)
                         if (!xprt->xl_private) {
                                 continue;
                         }
+
+                        if (GF_ATOMIC_GET(xprt->disconnect_progress))
+                                continue;
+
                         if (xprt->xl_private->bound_xl == data) {
                                 gf_log (this->name, GF_LOG_INFO,
                                         "disconnecting %s",
                                         xprt->peerinfo.identifier);
                                 xprt_found = _gf_true;
+                                totdisconnect++;
                                 rpc_transport_disconnect (xprt, _gf_false);
                         }
                 }
 
+                if (totxprt > totdisconnect)
+                        GF_ATOMIC_SUB(victim->xprtrefcnt, (totxprt - totdisconnect));
+
                 pthread_mutex_unlock (&conf->mutex);
                 if (this->ctx->active) {
                         top = this->ctx->active->first;
-- 
2.20.1