|
|
d1681e |
From 975e18d864b0b5c9158abae8752271e4a7fe6299 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Date: Tue, 27 Mar 2018 16:53:33 +0530
|
|
|
d1681e |
Subject: [PATCH 213/236] glusterd: mark port_registered to true for all
|
|
|
d1681e |
running bricks with brick mux
|
|
|
d1681e |
|
|
|
d1681e |
glusterd maintains a boolean flag 'port_registered' which is used to determine
|
|
|
d1681e |
if a brick has completed its portmap sign in process. This flag is (re)set in
|
|
|
d1681e |
pmap_sigin and pmap_signout events. In case of brick multiplexing this flag is
|
|
|
d1681e |
the identifier to determine if the very first brick with which the process is
|
|
|
d1681e |
spawned up has completed its sign in process. However in case of glusterd
|
|
|
d1681e |
restart when a brick is already identified as running, glusterd does a
|
|
|
d1681e |
pmap_registry_bind to ensure its portmap table is updated but this flag isn't
|
|
|
d1681e |
which is fine in case of non brick multiplex case but causes an issue if
|
|
|
d1681e |
the very first brick which came as part of process is replaced and then
|
|
|
d1681e |
the subsequent brick attach will fail. One of the way to validate this
|
|
|
d1681e |
is to create and start a volume, remove the first brick and then
|
|
|
d1681e |
add-brick a new one. Add-brick operation will take a very long time and
|
|
|
d1681e |
post that the volume status will show all other brick status apart from
|
|
|
d1681e |
the new brick as down.
|
|
|
d1681e |
|
|
|
d1681e |
Solution is to set brickinfo->port_registered to true for all the
|
|
|
d1681e |
running bricks when brick multiplexing is enabled.
|
|
|
d1681e |
|
|
|
d1681e |
>upstream mainline patch : https://review.gluster.org/#/c/19800/
|
|
|
d1681e |
|
|
|
d1681e |
>Change-Id: Ib0662d99d0fa66b1538947fd96b43f1cbc04e4ff
|
|
|
d1681e |
>Fixes: bz#1560957
|
|
|
d1681e |
>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: Ib0662d99d0fa66b1538947fd96b43f1cbc04e4ff
|
|
|
d1681e |
BUG: 1560955
|
|
|
d1681e |
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/134827
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Sanju Rakonde <srakonde@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
.../bug-1560955-brick-mux-port-registered-issue.t | 39 ++++++++++++++++++++++
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-handler.c | 2 ++
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-utils.c | 1 +
|
|
|
d1681e |
3 files changed, 42 insertions(+)
|
|
|
d1681e |
create mode 100644 tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t b/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t
|
|
|
d1681e |
new file mode 100644
|
|
|
d1681e |
index 0000000..d1b8f06
|
|
|
d1681e |
--- /dev/null
|
|
|
d1681e |
+++ b/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t
|
|
|
d1681e |
@@ -0,0 +1,39 @@
|
|
|
d1681e |
+#!/bin/bash
|
|
|
d1681e |
+
|
|
|
d1681e |
+. $(dirname $0)/../../include.rc
|
|
|
d1681e |
+. $(dirname $0)/../../traps.rc
|
|
|
d1681e |
+. $(dirname $0)/../../volume.rc
|
|
|
d1681e |
+
|
|
|
d1681e |
+function count_brick_processes {
|
|
|
d1681e |
+ pgrep glusterfsd | wc -l
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
+function count_brick_pids {
|
|
|
d1681e |
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
|
|
|
d1681e |
+ | grep -v "N/A" | sort | uniq | wc -l
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
+cleanup;
|
|
|
d1681e |
+
|
|
|
d1681e |
+#bug-1560955 - brick status goes offline after remove-brick followed by add-brick
|
|
|
d1681e |
+TEST glusterd
|
|
|
d1681e |
+TEST $CLI volume set all cluster.brick-multiplex on
|
|
|
d1681e |
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
|
|
|
d1681e |
+push_trapfunc "cleanup"
|
|
|
d1681e |
+
|
|
|
d1681e |
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
|
|
|
d1681e |
+TEST $CLI volume start $V0
|
|
|
d1681e |
+
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
|
|
|
d1681e |
+
|
|
|
d1681e |
+
|
|
|
d1681e |
+pkill glusterd
|
|
|
d1681e |
+TEST glusterd
|
|
|
d1681e |
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 force
|
|
|
d1681e |
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1_new force
|
|
|
d1681e |
+
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
index dbf80a1..cb19321 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
@@ -5721,6 +5721,8 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
|
|
|
d1681e |
count, brickinfo->port);
|
|
|
d1681e |
fprintf (fp, "Volume%d.Brick%d.rdma_port: %d\n", count_bkp,
|
|
|
d1681e |
count, brickinfo->rdma_port);
|
|
|
d1681e |
+ fprintf (fp, "Volume%d.Brick%d.port_registered: %d\n",
|
|
|
d1681e |
+ count_bkp, count, brickinfo->port_registered);
|
|
|
d1681e |
fprintf (fp, "Volume%d.Brick%d.status: %s\n", count_bkp,
|
|
|
d1681e |
count, brickinfo->status ? "Started" : "Stopped");
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
index 49605cc..5e9213c 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
@@ -5976,6 +5976,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
* TBD: re-use RPC connection across bricks
|
|
|
d1681e |
*/
|
|
|
d1681e |
if (is_brick_mx_enabled ()) {
|
|
|
d1681e |
+ brickinfo->port_registered = _gf_true;
|
|
|
d1681e |
ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
|
|
|
d1681e |
sizeof(socketpath));
|
|
|
d1681e |
if (ret) {
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|