|
|
21ab4e |
From c1fb83040ecc324c503d93dfd800c5bdc677428c Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Jeff Darcy <jdarcy@redhat.com>
|
|
|
21ab4e |
Date: Thu, 2 Feb 2017 13:08:04 -0500
|
|
|
21ab4e |
Subject: [PATCH 324/361] glusterd: double-check brick liveness for
|
|
|
21ab4e |
remove-brick validation
|
|
|
21ab4e |
|
|
|
21ab4e |
Same problem as https://review.gluster.org/#/c/16509/ in a different
|
|
|
21ab4e |
place. Tests detach bricks without glusterd's knowledge, so
|
|
|
21ab4e |
glusterd's internal brick state is out of date and we have to re-check
|
|
|
21ab4e |
(via the brick's pidfile) as well.
|
|
|
21ab4e |
|
|
|
21ab4e |
mainline:
|
|
|
21ab4e |
> BUG: 1385758
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/16529
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
(cherry picked from commit 13cd11a91ec52af6a7cfcbd7e0c34f1c27904df6)
|
|
|
21ab4e |
|
|
|
21ab4e |
BUG: 1417815
|
|
|
21ab4e |
Change-Id: I169538c1c62d72a685a49d57ef65fb6c3db6eab2
|
|
|
21ab4e |
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/101305
|
|
|
21ab4e |
Tested-by: Milind Changire <mchangir@redhat.com>
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
...-1225716-brick-online-validation-remove-brick.t | 6 ++++--
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 22 +++++++++++++++++++---
|
|
|
21ab4e |
2 files changed, 23 insertions(+), 5 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t b/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t
|
|
|
21ab4e |
index eca1c1a..47403b4 100644
|
|
|
21ab4e |
--- a/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t
|
|
|
21ab4e |
+++ b/tests/bugs/glusterd/bug-1225716-brick-online-validation-remove-brick.t
|
|
|
21ab4e |
@@ -12,7 +12,8 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
|
|
|
21ab4e |
TEST $CLI volume start $V0
|
|
|
21ab4e |
|
|
|
21ab4e |
#kill a brick process
|
|
|
21ab4e |
-kill -15 `cat $GLUSTERD_WORKDIR/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
|
|
|
21ab4e |
+kill_brick $V0 $H0 $B0/${V0}1
|
|
|
21ab4e |
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
|
|
|
21ab4e |
|
|
|
21ab4e |
#remove-brick start should fail as the brick is down
|
|
|
21ab4e |
TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
|
|
|
21ab4e |
@@ -26,7 +27,8 @@ TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
|
|
|
21ab4e |
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}1"
|
|
|
21ab4e |
|
|
|
21ab4e |
#kill a brick process
|
|
|
21ab4e |
-kill -15 `cat $GLUSTERD_WORKDIR/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
|
|
|
21ab4e |
+kill_brick $V0 $H0 $B0/${V0}1
|
|
|
21ab4e |
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
|
|
|
21ab4e |
|
|
|
21ab4e |
#remove-brick commit should pass even if the brick is down
|
|
|
21ab4e |
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
|
|
|
21ab4e |
index b22a7da..e12d314 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
|
|
|
21ab4e |
@@ -1947,6 +1947,8 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
|
|
|
21ab4e |
glusterd_peerinfo_t *peerinfo = NULL;
|
|
|
21ab4e |
int i = 0;
|
|
|
21ab4e |
int ret = -1;
|
|
|
21ab4e |
+ char pidfile[PATH_MAX+1] = {0,};
|
|
|
21ab4e |
+ glusterd_conf_t *priv = THIS->private;
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Check whether all the nodes of the bricks to be removed are
|
|
|
21ab4e |
* up, if not fail the operation */
|
|
|
21ab4e |
@@ -1996,15 +1998,29 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) {
|
|
|
21ab4e |
- if (((cmd == GF_OP_CMD_START) ||
|
|
|
21ab4e |
- (cmd == GF_OP_CMD_DETACH_START)) &&
|
|
|
21ab4e |
- brickinfo->status != GF_BRICK_STARTED) {
|
|
|
21ab4e |
+ switch (cmd) {
|
|
|
21ab4e |
+ case GF_OP_CMD_START:
|
|
|
21ab4e |
+ case GF_OP_CMD_DETACH_START:
|
|
|
21ab4e |
+ break;
|
|
|
21ab4e |
+ default:
|
|
|
21ab4e |
+ continue;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ if (brickinfo->status != GF_BRICK_STARTED) {
|
|
|
21ab4e |
snprintf (msg, sizeof (msg), "Found stopped "
|
|
|
21ab4e |
"brick %s", brick);
|
|
|
21ab4e |
*errstr = gf_strdup (msg);
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
|
|
|
21ab4e |
+ brickinfo, priv);
|
|
|
21ab4e |
+ if (!gf_is_service_running (pidfile, NULL)) {
|
|
|
21ab4e |
+ snprintf (msg, sizeof (msg), "Found dead "
|
|
|
21ab4e |
+ "brick %s", brick);
|
|
|
21ab4e |
+ *errstr = gf_strdup (msg);
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|