From 975e18d864b0b5c9158abae8752271e4a7fe6299 Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 27 Mar 2018 16:53:33 +0530 Subject: [PATCH 213/236] glusterd: mark port_registered to true for all running bricks with brick mux glusterd maintains a boolean flag 'port_registered' which is used to determine if a brick has completed its portmap sign in process. This flag is (re)set in pmap_sigin and pmap_signout events. In case of brick multiplexing this flag is the identifier to determine if the very first brick with which the process is spawned up has completed its sign in process. However in case of glusterd restart when a brick is already identified as running, glusterd does a pmap_registry_bind to ensure its portmap table is updated but this flag isn't which is fine in case of non brick multiplex case but causes an issue if the very first brick which came as part of process is replaced and then the subsequent brick attach will fail. One of the way to validate this is to create and start a volume, remove the first brick and then add-brick a new one. Add-brick operation will take a very long time and post that the volume status will show all other brick status apart from the new brick as down. Solution is to set brickinfo->port_registered to true for all the running bricks when brick multiplexing is enabled. >upstream mainline patch : https://review.gluster.org/#/c/19800/ >Change-Id: Ib0662d99d0fa66b1538947fd96b43f1cbc04e4ff >Fixes: bz#1560957 >Signed-off-by: Atin Mukherjee Change-Id: Ib0662d99d0fa66b1538947fd96b43f1cbc04e4ff BUG: 1560955 Signed-off-by: Atin Mukherjee Reviewed-on: https://code.engineering.redhat.com/gerrit/134827 Tested-by: RHGS Build Bot Reviewed-by: Sanju Rakonde --- .../bug-1560955-brick-mux-port-registered-issue.t | 39 ++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-handler.c | 2 ++ xlators/mgmt/glusterd/src/glusterd-utils.c | 1 + 3 files changed, 42 insertions(+) create mode 100644 tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t diff --git a/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t b/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t new file mode 100644 index 0000000..d1b8f06 --- /dev/null +++ b/tests/bugs/glusterd/bug-1560955-brick-mux-port-registered-issue.t @@ -0,0 +1,39 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +#bug-1560955 - brick status goes offline after remove-brick followed by add-brick +TEST glusterd +TEST $CLI volume set all cluster.brick-multiplex on +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3} +TEST $CLI volume start $V0 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count + + +pkill glusterd +TEST glusterd +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 force +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1_new force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index dbf80a1..cb19321 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5721,6 +5721,8 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) count, brickinfo->port); fprintf (fp, "Volume%d.Brick%d.rdma_port: %d\n", count_bkp, count, brickinfo->rdma_port); + fprintf (fp, "Volume%d.Brick%d.port_registered: %d\n", + count_bkp, count, brickinfo->port_registered); fprintf (fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count, brickinfo->status ? "Started" : "Stopped"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 49605cc..5e9213c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5976,6 +5976,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, * TBD: re-use RPC connection across bricks */ if (is_brick_mx_enabled ()) { + brickinfo->port_registered = _gf_true; ret = glusterd_get_sock_from_brick_pid (pid, socketpath, sizeof(socketpath)); if (ret) { -- 1.8.3.1