3604df
From 7492e5c60998786b84edb1bb3c21bba503652129 Mon Sep 17 00:00:00 2001
3604df
From: Atin Mukherjee <amukherj@redhat.com>
3604df
Date: Mon, 25 Jul 2016 19:09:08 +0530
3604df
Subject: [PATCH 23/86] glusterd: clean up old port and allocate new one on every restart
3604df
3604df
Backport of http://review.gluster.org/15005
3604df
3604df
GlusterD as of now was blindly assuming that the brick port which was already
3604df
allocated would be available to be reused and that assumption is absolutely
3604df
wrong.
3604df
3604df
Solution : On first attempt, we thought GlusterD should check if the already
3604df
allocated brick ports are free, if not allocate new port and pass it to the
3604df
daemon. But with that approach there is a possibility that if PMAP_SIGNOUT is
3604df
missed out, the stale port will be given back to the clients where connection
3604df
will keep on failing. Now given the port allocation always start from base_port,
3604df
if everytime a new port has to be allocated for the daemons, the port range will
3604df
still be under control. So this fix tries to clean up old port using
3604df
pmap_registry_remove () if any and then goes for pmap_registry_alloc ()
3604df
3604df
This BZ fixes #1263090 as well
3604df
3604df
>Reviewed-on: http://review.gluster.org/15005
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Avra Sengupta <asengupt@redhat.com>
3604df
3604df
Change-Id: If54a055d01ab0cbc06589dc1191d8fc52eb2c84f
3604df
BUG: 1356058
3604df
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/84635
3604df
---
3604df
 .../glusterd/1313628-import-brick-ports-always.t   |   47 --------------------
3604df
 tests/features/ssl-ciphers.t                       |    5 ++
3604df
 xlators/mgmt/glusterd/src/glusterd-messages.h      |   10 ++++-
3604df
 xlators/mgmt/glusterd/src/glusterd-pmap.c          |   24 ++++++++++-
3604df
 xlators/mgmt/glusterd/src/glusterd-pmap.h          |    2 +
3604df
 xlators/mgmt/glusterd/src/glusterd-snapd-svc.c     |   23 +---------
3604df
 xlators/mgmt/glusterd/src/glusterd-utils.c         |   12 +++---
3604df
 7 files changed, 46 insertions(+), 77 deletions(-)
3604df
 delete mode 100755 tests/bugs/glusterd/1313628-import-brick-ports-always.t
3604df
3604df
diff --git a/tests/bugs/glusterd/1313628-import-brick-ports-always.t b/tests/bugs/glusterd/1313628-import-brick-ports-always.t
3604df
deleted file mode 100755
3604df
index d04c429..0000000
3604df
--- a/tests/bugs/glusterd/1313628-import-brick-ports-always.t
3604df
+++ /dev/null
3604df
@@ -1,47 +0,0 @@
3604df
-#!/bin/bash
3604df
-. $(dirname $0)/../../include.rc
3604df
-. $(dirname $0)/../../cluster.rc
3604df
-
3604df
-## Check that brick ports are always copied on import
3604df
-## --------------------------------------------------
3604df
-## This test checks that the brick ports are copied on import by checking that
3604df
-## they don't change when the following happens,
3604df
-##  - Stop a volume
3604df
-##  - Stop glusterd
3604df
-##  - Start the stopped volume
3604df
-##  - Start the stopped glusterd
3604df
-
3604df
-function get_brick_port() {
3604df
-  local VOL=$1
3604df
-  local BRICK=$2
3604df
-  $CLI2 volume status $VOL $BRICK --xml | sed -ne 's/.*<port>\([0-9]*\)<\/port>/\1/p'
3604df
-}
3604df
-
3604df
-
3604df
-cleanup
3604df
-
3604df
-TEST launch_cluster 2
3604df
-TEST $CLI1 peer probe $H2
3604df
-EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
3604df
-
3604df
-# Create and start volume so that brick port assignment happens
3604df
-TEST $CLI1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
3604df
-TEST $CLI1 volume start $V0
3604df
-
3604df
-# Save port for 2nd brick
3604df
-BPORT_ORIG=$(get_brick_port $V0 $H2:$B2/$V0)
3604df
-
3604df
-# Stop volume, stop 2nd glusterd, start volume, start 2nd glusterd
3604df
-TEST $CLI1 volume stop $V0
3604df
-TEST kill_glusterd 2
3604df
-
3604df
-TEST $CLI1 volume start $V0
3604df
-TEST start_glusterd 2
3604df
-EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
3604df
-
3604df
-# Get new port and compare with old one
3604df
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $BPORT_ORIG get_brick_port $V0 $H2:$B2/$V0
3604df
-
3604df
-$CLI1 volume stop $V0
3604df
-
3604df
-cleanup
3604df
diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
3604df
index 9ee7fc6..f5909f3 100644
3604df
--- a/tests/features/ssl-ciphers.t
3604df
+++ b/tests/features/ssl-ciphers.t
3604df
@@ -137,6 +137,7 @@ EXPECT "`pwd`/`dirname $0`/dh1024.pem" volume_option $V0 ssl.dh-param
3604df
 TEST $CLI volume stop $V0
3604df
 TEST $CLI volume start $V0
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
3604df
+BRICK_PORT=`brick_port $V0`
3604df
 EXPECT "Y" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
3604df
 
3604df
 # Test the cipher-list option
3604df
@@ -145,6 +146,7 @@ EXPECT AES256-SHA volume_option $V0 ssl.cipher-list
3604df
 TEST $CLI volume stop $V0
3604df
 TEST $CLI volume start $V0
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
3604df
+BRICK_PORT=`brick_port $V0`
3604df
 EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
3604df
 EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
3604df
 
3604df
@@ -154,6 +156,7 @@ EXPECT EECDH:EDH:!TLSv1 volume_option $V0 ssl.cipher-list
3604df
 TEST $CLI volume stop $V0
3604df
 TEST $CLI volume start $V0
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
3604df
+BRICK_PORT=`brick_port $V0`
3604df
 EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
3604df
 EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
3604df
 
3604df
@@ -162,6 +165,7 @@ EXPECT invalid volume_option $V0 ssl.ec-curve
3604df
 TEST $CLI volume stop $V0
3604df
 TEST $CLI volume start $V0
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
3604df
+BRICK_PORT=`brick_port $V0`
3604df
 EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
3604df
 
3604df
 TEST $CLI volume set $V0 ssl.ec-curve secp521r1
3604df
@@ -169,6 +173,7 @@ EXPECT secp521r1 volume_option $V0 ssl.ec-curve
3604df
 TEST $CLI volume stop $V0
3604df
 TEST $CLI volume start $V0
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
3604df
+BRICK_PORT=`brick_port $V0`
3604df
 EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
3604df
 
3604df
 # test revocation
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
3604df
index ba40b8f..623f4dc 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
3604df
@@ -41,7 +41,7 @@
3604df
 
3604df
 #define GLUSTERD_COMP_BASE      GLFS_MSGID_GLUSTERD
3604df
 
3604df
-#define GLFS_NUM_MESSAGES       578
3604df
+#define GLFS_NUM_MESSAGES       579
3604df
 
3604df
 #define GLFS_MSGID_END          (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
3604df
 /* Messaged with message IDs */
3604df
@@ -4673,6 +4673,14 @@
3604df
  */
3604df
 #define GD_MSG_DICT_GET_SUCCESS                   (GLUSTERD_COMP_BASE + 578)
3604df
 
3604df
+/*!
3604df
+ * @messageid
3604df
+ * @diagnosis
3604df
+ * @recommendedaction
3604df
+ *
3604df
+ */
3604df
+#define GD_MSG_PMAP_REGISTRY_REMOVE_FAIL          (GLUSTERD_COMP_BASE + 579)
3604df
+
3604df
 /*------------*/
3604df
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
3604df
 #endif /* !_GLUSTERD_MESSAGES_H_ */
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
3604df
index 3b9b227..377d206 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
3604df
@@ -203,6 +203,29 @@ pmap_registry_alloc (xlator_t *this)
3604df
         return port;
3604df
 }
3604df
 
3604df
+/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc,
3604df
+ * the reason for the former is to ensure we don't end up with stale ports
3604df
+ */
3604df
+int
3604df
+pmap_assign_port (xlator_t *this, int old_port, const char *path)
3604df
+{
3604df
+        int ret = -1;
3604df
+        int new_port = 0;
3604df
+
3604df
+        if (old_port) {
3604df
+                ret = pmap_registry_remove (this, 0, path,
3604df
+                                            GF_PMAP_PORT_BRICKSERVER, NULL);
3604df
+                if (ret) {
3604df
+                        gf_msg (this->name, GF_LOG_WARNING,
3604df
+                                GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed toi"
3604df
+                                "remove pmap registry for older signin for path"
3604df
+                                " %s", path);
3604df
+                }
3604df
+        }
3604df
+        new_port = pmap_registry_alloc (this);
3604df
+        return new_port;
3604df
+}
3604df
+
3604df
 int
3604df
 pmap_registry_bind (xlator_t *this, int port, const char *brickname,
3604df
                     gf_pmap_port_type_t type, void *xprt)
3604df
@@ -452,7 +475,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
3604df
                 req->rpc_err = GARBAGE_ARGS;
3604df
                 goto fail;
3604df
         }
3604df
-
3604df
         rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
3604df
                                            GF_PMAP_PORT_BRICKSERVER, req->trans);
3604df
 
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
3604df
index 95ded04..14187da 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
3604df
@@ -35,6 +35,8 @@ struct pmap_registry {
3604df
         struct  pmap_port_status ports[65536];
3604df
 };
3604df
 
3604df
+int pmap_assign_port (xlator_t *this, int port, const char *path);
3604df
+int pmap_mark_port_leased (xlator_t *this, int port);
3604df
 int pmap_registry_alloc (xlator_t *this);
3604df
 int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
3604df
                         gf_pmap_port_type_t type, void *xprt);
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
3604df
index 830dc1a..36e4a19 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
3604df
@@ -295,28 +295,7 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags)
3604df
                          "--brick-name", snapd_id,
3604df
                          "-S", svc->conn.sockpath, NULL);
3604df
 
3604df
-        /* Do a pmap registry remove on the older connected port */
3604df
-        if (volinfo->snapd.port) {
3604df
-                ret = pmap_registry_remove (this, volinfo->snapd.port,
3604df
-                                            snapd_id, GF_PMAP_PORT_BRICKSERVER,
3604df
-                                            NULL);
3604df
-                if (ret) {
3604df
-                        snprintf (msg, sizeof (msg), "Failed to remove pmap "
3604df
-                                  "registry for older signin");
3604df
-                        goto out;
3604df
-                }
3604df
-        }
3604df
-
3604df
-        snapd_port = pmap_registry_alloc (THIS);
3604df
-        if (!snapd_port) {
3604df
-                snprintf (msg, sizeof (msg), "Could not allocate port "
3604df
-                          "for snapd service for volume %s",
3604df
-                          volinfo->volname);
3604df
-                runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
3604df
-                ret = -1;
3604df
-                goto out;
3604df
-        }
3604df
-
3604df
+        snapd_port = pmap_assign_port (THIS, volinfo->snapd.port, snapd_id);
3604df
         volinfo->snapd.port = snapd_port;
3604df
 
3604df
         runner_add_arg (&runner, "--brick-port");
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
3604df
index 46a3509..44c9284 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
3604df
@@ -1786,6 +1786,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
3604df
         char                    socketpath[PATH_MAX] = {0};
3604df
         char                    glusterd_uuid[1024] = {0,};
3604df
         char                    valgrind_logfile[PATH_MAX] = {0};
3604df
+        char                    rdma_brick_path[PATH_MAX] = {0,};
3604df
 
3604df
         GF_ASSERT (volinfo);
3604df
         GF_ASSERT (brickinfo);
3604df
@@ -1818,9 +1819,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
3604df
         if (gf_is_service_running (pidfile, NULL))
3604df
                 goto connect;
3604df
 
3604df
-        port = brickinfo->port;
3604df
-        if (!port)
3604df
-                port = pmap_registry_alloc (THIS);
3604df
+        port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path);
3604df
 
3604df
         /* Build the exp_path, before starting the glusterfsd even in
3604df
            valgrind mode. Otherwise all the glusterfsd processes start
3604df
@@ -1885,9 +1884,10 @@ retry:
3604df
         if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
3604df
                 runner_argprintf (&runner, "%d", port);
3604df
         } else {
3604df
-                rdma_port = brickinfo->rdma_port;
3604df
-                if (!rdma_port)
3604df
-                        rdma_port = pmap_registry_alloc (THIS);
3604df
+                snprintf (rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma",
3604df
+                          brickinfo->path);
3604df
+                rdma_port = pmap_assign_port (THIS, brickinfo->rdma_port,
3604df
+                                              rdma_brick_path);
3604df
                 runner_argprintf (&runner, "%d,%d", port, rdma_port);
3604df
                 runner_add_arg (&runner, "--xlator-option");
3604df
                 runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d",
3604df
-- 
3604df
1.7.1
3604df