From 84007736637b8278858401d1988c1ea39df530d0 Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 21 Jul 2015 09:57:43 +0530 Subject: [PATCH 300/304] glusterd: Don't allow remove brick start/commit if glusterd is down of the host of the brick Backport of http://review.gluster.org/#/c/11726/ remove brick stage blindly starts the remove brick operation even if the glusterd instance of the node hosting the brick is down. Operationally its incorrect and this could result into a inconsistent rebalance status across all the nodes as the originator of this command will always have the rebalance status to 'DEFRAG_NOT_STARTED', however when the glusterd instance on the other nodes comes up, will trigger rebalance and make the status to completed once the rebalance is finished. This patch fixes two things: 1. Add a validation in remove brick to check whether all the peers hosting the bricks to be removed are up. 2. Don't copy volinfo->rebal.dict from stale volinfo during restore as this might end up in a incosistent node_state.info file resulting into volume status command failure. Change-Id: Ia4a76865c05037d49eec5e3bbfaf68c1567f1f81 BUG: 1236038 Signed-off-by: Atin Mukherjee Reviewed-on: http://review.gluster.org/11726 Tested-by: NetBSD Build System Reviewed-by: N Balachandran Reviewed-by: Krishnan Parthasarathi Reviewed-on: https://code.engineering.redhat.com/gerrit/56353 Tested-by: Krishnan Parthasarathi --- .../glusterd/bug-1245045-remove-brick-validation.t | 54 +++++++++ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 119 +++++++++++++++----- xlators/mgmt/glusterd/src/glusterd-utils.c | 1 - 3 files changed, 143 insertions(+), 31 deletions(-) create mode 100644 tests/bugs/glusterd/bug-1245045-remove-brick-validation.t diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t new file mode 100644 index 0000000..22a8d55 --- /dev/null +++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t @@ -0,0 +1,54 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../cluster.rc + +cleanup + +TEST launch_cluster 3; +TEST $CLI_1 peer probe $H2; +TEST $CLI_1 peer probe $H3; +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 +TEST $CLI_1 volume start $V0 + +kill_glusterd 2 + +#remove-brick should fail as the peer hosting the brick is down +TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + +TEST start_glusterd 2 + +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +#volume status should work +TEST $CLI_2 volume status + + +TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start +kill_glusterd 2 + +#remove-brick commit should fail as the peer hosting the brick is down +TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit + +TEST start_glusterd 2 + +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +#volume status should work +TEST $CLI_2 volume status + +TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop + +kill_glusterd 3 +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + +TEST start_glusterd 3 +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +TEST $CLI_3 volume status + +cleanup diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index caf51a4..e1a0397 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1661,6 +1661,83 @@ out: return ret; } +static int +glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count, + dict_t *dict, + glusterd_volinfo_t *volinfo, + char **errstr) +{ + char *brick = NULL; + char msg[2048] = {0,}; + char key[256] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int i = 0; + int ret = -1; + + /* Check whether all the nodes of the bricks to be removed are + * up, if not fail the operation */ + for (i = 1; i <= brick_count; i++) { + snprintf (key, sizeof (key), "brick%d", i); + ret = dict_get_str (dict, key, &brick); + if (ret) { + snprintf (msg, sizeof (msg), + "Unable to get %s", key); + *errstr = gf_strdup (msg); + goto out; + } + + ret = + glusterd_volume_brickinfo_get_by_brick(brick, volinfo, + &brickinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Incorrect brick " + "%s for volume %s", brick, volinfo->volname); + *errstr = gf_strdup (msg); + goto out; + } + /* Do not allow commit if the bricks are not decommissioned + * if its a remove brick commit + */ + if (cmd == GF_OP_CMD_COMMIT && !brickinfo->decommissioned) { + snprintf (msg, sizeof (msg), "Brick %s " + "is not decommissioned. " + "Use start or force option", + brick); + *errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) + continue; + + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find_by_uuid + (brickinfo->uuid); + if (!peerinfo) { + snprintf (msg, sizeof(msg), "Host node of the " + "brick %s is not in cluster", brick); + *errstr = gf_strdup (msg); + ret = -1; + rcu_read_unlock (); + goto out; + } + if (!peerinfo->connected) { + snprintf (msg, sizeof(msg), "Host node of the " + "brick %s is down", brick); + *errstr = gf_strdup (msg); + ret = -1; + rcu_read_unlock (); + goto out; + } + rcu_read_unlock (); + } + +out: + return ret; +} + int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) { @@ -1679,6 +1756,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char *brick = NULL; glusterd_brickinfo_t *brickinfo = NULL; gsync_status_param_t param = {0,}; + glusterd_peerinfo_t *peerinfo = NULL; this = THIS; GF_ASSERT (this); @@ -1817,6 +1895,12 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_remove_brick_validate_bricks (cmd, brick_count, + dict, volinfo, + &errstr); + if (ret) + goto out; + if (is_origin_glusterd (dict)) { ret = glusterd_generate_and_set_task_id (dict, GF_REMOVE_BRICK_TID_KEY); @@ -1861,36 +1945,11 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } - /* Do not allow commit if the bricks are not decommissioned */ - for ( i = 1; i <= brick_count; i++ ) { - snprintf (key, sizeof (key), "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - snprintf (msg, sizeof (msg), - "Unable to get %s", key); - errstr = gf_strdup (msg); - goto out; - } - - ret = - glusterd_volume_brickinfo_get_by_brick(brick, volinfo, - &brickinfo); - if (ret) { - snprintf (msg, sizeof (msg), "Incorrect brick " - "%s for volume %s", brick, volname); - errstr = gf_strdup (msg); - goto out; - } - if ( !brickinfo->decommissioned ) { - snprintf (msg, sizeof (msg), "Brick %s " - "is not decommissioned. " - "Use start or force option", - brick); - errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } + ret = glusterd_remove_brick_validate_bricks (cmd, brick_count, + dict, volinfo, + &errstr); + if (ret) + goto out; /* If geo-rep is configured, for this volume, it should be * stopped. diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 4982828..0f0ca27 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3739,7 +3739,6 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo, new->skipped_files = old->skipped_files; new->rebalance_failures = old->rebalance_failures; new->rebalance_time = old->rebalance_time; - new->dict = (old->dict ? dict_ref (old->dict) : NULL); /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume * import -- 1.7.1