d1681e
From 385b61f9a6f818c2810cc0a2223c9d71340cd345 Mon Sep 17 00:00:00 2001
d1681e
From: Atin Mukherjee <amukherj@redhat.com>
d1681e
Date: Tue, 17 Oct 2017 21:32:44 +0530
d1681e
Subject: [PATCH 43/74] glusterd: clean up portmap on brick disconnect
d1681e
d1681e
GlusterD's portmap entry for a brick is cleaned up when a PMAP_SIGNOUT event is
d1681e
initiated by the brick process at the shutdown. But if the brick process crashes
d1681e
or gets killed through SIGKILL then this event is not initiated and glusterd
d1681e
ends up with a stale port. Since GlusterD's portmap traversal happens both ways,
d1681e
forward for allocation and backward for registry search, there is a possibility
d1681e
that glusterd might end up running with a stale port for a brick which
d1681e
eventually will end up with clients to fail to connect to the bricks.
d1681e
d1681e
Solution is to clean up the port entry in case the process is down as
d1681e
part of the brick disconnect event. Although with this the handling
d1681e
PMAP_SIGNOUT event becomes redundant in most of the cases, but this is
d1681e
the safeguard method to avoid glusterd getting into the stale port
d1681e
issues.
d1681e
d1681e
>mainline patch : https://review.gluster.org/#/c/18541
d1681e
d1681e
Change-Id: I04c5be6d11e772ee4de16caf56dbb37d5c944303
d1681e
BUG: 1503244
d1681e
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/123057
d1681e
Reviewed-by: Gaurav Yadav <gyadav@redhat.com>
d1681e
---
d1681e
 xlators/mgmt/glusterd/src/glusterd-handler.c | 25 +++++++++++++++++++++++++
d1681e
 xlators/mgmt/glusterd/src/glusterd-pmap.c    | 26 +++++++++++++++++---------
d1681e
 xlators/mgmt/glusterd/src/glusterd-pmap.h    |  3 ++-
d1681e
 xlators/mgmt/glusterd/src/glusterd.c         |  3 ++-
d1681e
 4 files changed, 46 insertions(+), 11 deletions(-)
d1681e
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
d1681e
index af9a796..34e751c 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
d1681e
@@ -5974,8 +5974,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
d1681e
         glusterd_volinfo_t      *volinfo           = NULL;
d1681e
         xlator_t                *this              = NULL;
d1681e
         int                      temp              = 0;
d1681e
+        int32_t                  pid               = -1;
d1681e
         glusterd_brickinfo_t    *brickinfo_tmp     = NULL;
d1681e
         glusterd_brick_proc_t   *brick_proc        = NULL;
d1681e
+        char                     pidfile[PATH_MAX] = {0};
d1681e
 
d1681e
         brickid = mydata;
d1681e
         if (!brickid)
d1681e
@@ -6074,6 +6076,29 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
d1681e
                                   "peer=%s;volume=%s;brick=%s",
d1681e
                                   brickinfo->hostname, volinfo->volname,
d1681e
                                   brickinfo->path);
d1681e
+                        /* In case of an abrupt shutdown of a brick PMAP_SIGNOUT
d1681e
+                         * event is not received by glusterd which can lead to a
d1681e
+                         * stale port entry in glusterd, so forcibly clean up
d1681e
+                         * the same if the process is not running
d1681e
+                         */
d1681e
+                        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
d1681e
+                                                    brickinfo, conf);
d1681e
+                        if (!gf_is_service_running (pidfile, &pid)) {
d1681e
+                                ret = pmap_registry_remove (
d1681e
+                                                      THIS, brickinfo->port,
d1681e
+                                                      brickinfo->path,
d1681e
+                                                      GF_PMAP_PORT_BRICKSERVER,
d1681e
+                                                      NULL, _gf_true);
d1681e
+                                if (ret) {
d1681e
+                                        gf_msg (this->name, GF_LOG_WARNING,
d1681e
+                                                GD_MSG_PMAP_REGISTRY_REMOVE_FAIL,
d1681e
+                                                0, "Failed to remove pmap "
d1681e
+                                                "registry for port %d for "
d1681e
+                                                "brick %s", brickinfo->port,
d1681e
+                                                brickinfo->path);
d1681e
+                                        ret = 0;
d1681e
+                                }
d1681e
+                        }
d1681e
                 }
d1681e
 
d1681e
                 if (is_brick_mx_enabled()) {
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
d1681e
index 2a75476..1b547e7 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
d1681e
@@ -239,7 +239,8 @@ pmap_assign_port (xlator_t *this, int old_port, const char *path)
d1681e
 
d1681e
         if (old_port) {
d1681e
                 ret = pmap_registry_remove (this, 0, path,
d1681e
-                                            GF_PMAP_PORT_BRICKSERVER, NULL);
d1681e
+                                            GF_PMAP_PORT_BRICKSERVER, NULL,
d1681e
+                                            _gf_false);
d1681e
                 if (ret) {
d1681e
                         gf_msg (this->name, GF_LOG_WARNING,
d1681e
                                 GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed to"
d1681e
@@ -342,7 +343,8 @@ pmap_registry_extend (xlator_t *this, int port, const char *brickname)
d1681e
 
d1681e
 int
d1681e
 pmap_registry_remove (xlator_t *this, int port, const char *brickname,
d1681e
-                      gf_pmap_port_type_t type, void *xprt)
d1681e
+                      gf_pmap_port_type_t type, void *xprt,
d1681e
+                      gf_boolean_t brick_disconnect)
d1681e
 {
d1681e
         struct pmap_registry *pmap = NULL;
d1681e
         int                   p = 0;
d1681e
@@ -389,11 +391,16 @@ remove:
d1681e
          * can delete the entire entry.
d1681e
          */
d1681e
         if (!pmap->ports[p].xprt) {
d1681e
-                brick_str = pmap->ports[p].brickname;
d1681e
-                if (brick_str) {
d1681e
-                        while (*brick_str != '\0') {
d1681e
-                                if (*(brick_str++) != ' ') {
d1681e
-                                        goto out;
d1681e
+                /* If the signout call is being triggered by brick disconnect
d1681e
+                 * then clean up all the bricks (in case of brick mux)
d1681e
+                 */
d1681e
+                if (!brick_disconnect) {
d1681e
+                        brick_str = pmap->ports[p].brickname;
d1681e
+                        if (brick_str) {
d1681e
+                                while (*brick_str != '\0') {
d1681e
+                                        if (*(brick_str++) != ' ') {
d1681e
+                                                goto out;
d1681e
+                                        }
d1681e
                                 }
d1681e
                         }
d1681e
                 }
d1681e
@@ -548,14 +555,15 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
d1681e
                 goto fail;
d1681e
         }
d1681e
         rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
d1681e
-                                           GF_PMAP_PORT_BRICKSERVER, req->trans);
d1681e
+                                           GF_PMAP_PORT_BRICKSERVER, req->trans,
d1681e
+                                           _gf_false);
d1681e
 
d1681e
         ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
d1681e
         if (args.rdma_port) {
d1681e
                 snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick);
d1681e
                 rsp.op_ret = pmap_registry_remove (THIS, args.rdma_port,
d1681e
                                 brick_path, GF_PMAP_PORT_BRICKSERVER,
d1681e
-                                req->trans);
d1681e
+                                req->trans, _gf_false);
d1681e
         }
d1681e
         /* Update portmap status on brickinfo */
d1681e
         if (brickinfo)
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
d1681e
index 9965a95..253b4cc 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
d1681e
@@ -42,7 +42,8 @@ int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
d1681e
                         gf_pmap_port_type_t type, void *xprt);
d1681e
 int pmap_registry_extend (xlator_t *this, int port, const char *brickname);
d1681e
 int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
d1681e
-                          gf_pmap_port_type_t type, void *xprt);
d1681e
+                          gf_pmap_port_type_t type, void *xprt,
d1681e
+                          gf_boolean_t brick_disconnect);
d1681e
 int pmap_registry_search (xlator_t *this, const char *brickname,
d1681e
                           gf_pmap_port_type_t type, gf_boolean_t destroy);
d1681e
 struct pmap_registry *pmap_registry_get (xlator_t *this);
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
d1681e
index 4887ff4..81a3206 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd.c
d1681e
@@ -424,7 +424,8 @@ glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
d1681e
                 pthread_mutex_lock (&priv->xprt_lock);
d1681e
                 list_del (&xprt->list);
d1681e
                 pthread_mutex_unlock (&priv->xprt_lock);
d1681e
-                pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_ANY, xprt);
d1681e
+                pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_ANY, xprt,
d1681e
+                                      _gf_false);
d1681e
                 break;
d1681e
         }
d1681e
 
d1681e
-- 
d1681e
1.8.3.1
d1681e