cb8e9e
From 84007736637b8278858401d1988c1ea39df530d0 Mon Sep 17 00:00:00 2001
cb8e9e
From: Atin Mukherjee <amukherj@redhat.com>
cb8e9e
Date: Tue, 21 Jul 2015 09:57:43 +0530
cb8e9e
Subject: [PATCH 300/304] glusterd: Don't allow remove brick start/commit if glusterd is down of the host of the brick
cb8e9e
cb8e9e
Backport of http://review.gluster.org/#/c/11726/
cb8e9e
cb8e9e
remove brick stage blindly starts the remove brick operation even if the
cb8e9e
glusterd instance of the node hosting the brick is down. Operationally its
cb8e9e
incorrect and this could result into a inconsistent rebalance status across all
cb8e9e
the nodes as the originator of this command will always have the rebalance
cb8e9e
status to 'DEFRAG_NOT_STARTED', however when the glusterd instance on the other
cb8e9e
nodes comes up, will trigger rebalance and make the status to completed once the
cb8e9e
rebalance is finished.
cb8e9e
cb8e9e
This patch fixes two things:
cb8e9e
1. Add a validation in remove brick to check whether all the peers hosting the
cb8e9e
bricks to be removed are up.
cb8e9e
cb8e9e
2. Don't copy volinfo->rebal.dict from stale volinfo during restore as this
cb8e9e
might end up in a incosistent node_state.info file resulting into volume status
cb8e9e
command failure.
cb8e9e
cb8e9e
Change-Id: Ia4a76865c05037d49eec5e3bbfaf68c1567f1f81
cb8e9e
BUG: 1236038
cb8e9e
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
cb8e9e
Reviewed-on: http://review.gluster.org/11726
cb8e9e
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
cb8e9e
Reviewed-by: N Balachandran <nbalacha@redhat.com>
cb8e9e
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/56353
cb8e9e
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
cb8e9e
---
cb8e9e
 .../glusterd/bug-1245045-remove-brick-validation.t |   54 +++++++++
cb8e9e
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c     |  119 +++++++++++++++-----
cb8e9e
 xlators/mgmt/glusterd/src/glusterd-utils.c         |    1 -
cb8e9e
 3 files changed, 143 insertions(+), 31 deletions(-)
cb8e9e
 create mode 100644 tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
cb8e9e
cb8e9e
diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
cb8e9e
new file mode 100644
cb8e9e
index 0000000..22a8d55
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
cb8e9e
@@ -0,0 +1,54 @@
cb8e9e
+#!/bin/bash
cb8e9e
+
cb8e9e
+. $(dirname $0)/../../include.rc
cb8e9e
+. $(dirname $0)/../../cluster.rc
cb8e9e
+
cb8e9e
+cleanup
cb8e9e
+
cb8e9e
+TEST launch_cluster 3;
cb8e9e
+TEST $CLI_1 peer probe $H2;
cb8e9e
+TEST $CLI_1 peer probe $H3;
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
cb8e9e
+
cb8e9e
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
cb8e9e
+TEST $CLI_1 volume start $V0
cb8e9e
+
cb8e9e
+kill_glusterd 2
cb8e9e
+
cb8e9e
+#remove-brick should fail as the peer hosting the brick is down
cb8e9e
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
cb8e9e
+
cb8e9e
+TEST start_glusterd 2
cb8e9e
+
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
cb8e9e
+
cb8e9e
+#volume status should work
cb8e9e
+TEST $CLI_2 volume status
cb8e9e
+
cb8e9e
+
cb8e9e
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
cb8e9e
+kill_glusterd 2
cb8e9e
+
cb8e9e
+#remove-brick commit should fail as the peer hosting the brick is down
cb8e9e
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit
cb8e9e
+
cb8e9e
+TEST start_glusterd 2
cb8e9e
+
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
cb8e9e
+
cb8e9e
+#volume status should work
cb8e9e
+TEST $CLI_2 volume status
cb8e9e
+
cb8e9e
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop
cb8e9e
+
cb8e9e
+kill_glusterd 3
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
cb8e9e
+
cb8e9e
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
cb8e9e
+
cb8e9e
+TEST start_glusterd 3
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
cb8e9e
+
cb8e9e
+TEST $CLI_3 volume status
cb8e9e
+
cb8e9e
+cleanup
cb8e9e
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
cb8e9e
index caf51a4..e1a0397 100644
cb8e9e
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
cb8e9e
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
cb8e9e
@@ -1661,6 +1661,83 @@ out:
cb8e9e
         return ret;
cb8e9e
 }
cb8e9e
 
cb8e9e
+static int
cb8e9e
+glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
cb8e9e
+                                       dict_t *dict,
cb8e9e
+                                       glusterd_volinfo_t *volinfo,
cb8e9e
+                                       char **errstr)
cb8e9e
+{
cb8e9e
+        char                   *brick       = NULL;
cb8e9e
+        char                    msg[2048]   = {0,};
cb8e9e
+        char                    key[256]    = {0,};
cb8e9e
+        glusterd_brickinfo_t   *brickinfo   = NULL;
cb8e9e
+        glusterd_peerinfo_t    *peerinfo    = NULL;
cb8e9e
+        int                     i           = 0;
cb8e9e
+        int                     ret         = -1;
cb8e9e
+
cb8e9e
+        /* Check whether all the nodes of the bricks to be removed are
cb8e9e
+        * up, if not fail the operation */
cb8e9e
+        for (i = 1; i <= brick_count; i++) {
cb8e9e
+                snprintf (key, sizeof (key), "brick%d", i);
cb8e9e
+                ret = dict_get_str (dict, key, &brick);
cb8e9e
+                if (ret) {
cb8e9e
+                        snprintf (msg, sizeof (msg),
cb8e9e
+                                  "Unable to get %s", key);
cb8e9e
+                        *errstr = gf_strdup (msg);
cb8e9e
+                        goto out;
cb8e9e
+                }
cb8e9e
+
cb8e9e
+                ret =
cb8e9e
+                glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
cb8e9e
+                                                       &brickinfo);
cb8e9e
+                if (ret) {
cb8e9e
+                        snprintf (msg, sizeof (msg), "Incorrect brick "
cb8e9e
+                                  "%s for volume %s", brick, volinfo->volname);
cb8e9e
+                        *errstr = gf_strdup (msg);
cb8e9e
+                        goto out;
cb8e9e
+                }
cb8e9e
+                /* Do not allow commit if the bricks are not decommissioned
cb8e9e
+                 * if its a remove brick commit
cb8e9e
+                 */
cb8e9e
+                if (cmd == GF_OP_CMD_COMMIT && !brickinfo->decommissioned) {
cb8e9e
+                        snprintf (msg, sizeof (msg), "Brick %s "
cb8e9e
+                                  "is not decommissioned. "
cb8e9e
+                                  "Use start or force option",
cb8e9e
+                                  brick);
cb8e9e
+                        *errstr = gf_strdup (msg);
cb8e9e
+                        ret = -1;
cb8e9e
+                        goto out;
cb8e9e
+                }
cb8e9e
+
cb8e9e
+                if (glusterd_is_local_brick (THIS, volinfo, brickinfo))
cb8e9e
+                        continue;
cb8e9e
+
cb8e9e
+                rcu_read_lock ();
cb8e9e
+                peerinfo = glusterd_peerinfo_find_by_uuid
cb8e9e
+                                                (brickinfo->uuid);
cb8e9e
+                if (!peerinfo) {
cb8e9e
+                        snprintf (msg, sizeof(msg), "Host node of the "
cb8e9e
+                                  "brick %s is not in cluster", brick);
cb8e9e
+                        *errstr = gf_strdup (msg);
cb8e9e
+                        ret = -1;
cb8e9e
+                        rcu_read_unlock ();
cb8e9e
+                        goto out;
cb8e9e
+                }
cb8e9e
+                if (!peerinfo->connected) {
cb8e9e
+                        snprintf (msg, sizeof(msg), "Host node of the "
cb8e9e
+                                  "brick %s is down", brick);
cb8e9e
+                        *errstr = gf_strdup (msg);
cb8e9e
+                        ret = -1;
cb8e9e
+                        rcu_read_unlock ();
cb8e9e
+                        goto out;
cb8e9e
+                }
cb8e9e
+                rcu_read_unlock ();
cb8e9e
+        }
cb8e9e
+
cb8e9e
+out:
cb8e9e
+        return ret;
cb8e9e
+}
cb8e9e
+
cb8e9e
 int
cb8e9e
 glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
cb8e9e
 {
cb8e9e
@@ -1679,6 +1756,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
cb8e9e
         char                   *brick       = NULL;
cb8e9e
         glusterd_brickinfo_t   *brickinfo   = NULL;
cb8e9e
         gsync_status_param_t    param       = {0,};
cb8e9e
+        glusterd_peerinfo_t    *peerinfo    = NULL;
cb8e9e
 
cb8e9e
         this = THIS;
cb8e9e
         GF_ASSERT (this);
cb8e9e
@@ -1817,6 +1895,12 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
cb8e9e
                         goto out;
cb8e9e
                 }
cb8e9e
 
cb8e9e
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
cb8e9e
+                                                             dict, volinfo,
cb8e9e
+                                                             &errstr);
cb8e9e
+                if (ret)
cb8e9e
+                        goto out;
cb8e9e
+
cb8e9e
                 if (is_origin_glusterd (dict)) {
cb8e9e
                         ret = glusterd_generate_and_set_task_id
cb8e9e
                                 (dict, GF_REMOVE_BRICK_TID_KEY);
cb8e9e
@@ -1861,36 +1945,11 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
cb8e9e
                         goto out;
cb8e9e
                 }
cb8e9e
 
cb8e9e
-                /* Do not allow commit if the bricks are not decommissioned */
cb8e9e
-                for ( i = 1; i <= brick_count; i++ ) {
cb8e9e
-                        snprintf (key, sizeof (key), "brick%d", i);
cb8e9e
-                        ret = dict_get_str (dict, key, &brick);
cb8e9e
-                        if (ret) {
cb8e9e
-                                snprintf (msg, sizeof (msg),
cb8e9e
-                                          "Unable to get %s", key);
cb8e9e
-                                errstr = gf_strdup (msg);
cb8e9e
-                                goto out;
cb8e9e
-                        }
cb8e9e
-
cb8e9e
-                        ret =
cb8e9e
-                        glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
cb8e9e
-                                                               &brickinfo);
cb8e9e
-                        if (ret) {
cb8e9e
-                                snprintf (msg, sizeof (msg), "Incorrect brick "
cb8e9e
-                                          "%s for volume %s", brick, volname);
cb8e9e
-                                errstr = gf_strdup (msg);
cb8e9e
-                                goto out;
cb8e9e
-                        }
cb8e9e
-                        if ( !brickinfo->decommissioned ) {
cb8e9e
-                                snprintf (msg, sizeof (msg), "Brick %s "
cb8e9e
-                                          "is not decommissioned. "
cb8e9e
-                                          "Use start or force option",
cb8e9e
-                                          brick);
cb8e9e
-                                errstr = gf_strdup (msg);
cb8e9e
-                                ret = -1;
cb8e9e
-                                goto out;
cb8e9e
-                        }
cb8e9e
-                }
cb8e9e
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
cb8e9e
+                                                             dict, volinfo,
cb8e9e
+                                                             &errstr);
cb8e9e
+                if (ret)
cb8e9e
+                        goto out;
cb8e9e
 
cb8e9e
                 /* If geo-rep is configured, for this volume, it should be
cb8e9e
                  * stopped.
cb8e9e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
cb8e9e
index 4982828..0f0ca27 100644
cb8e9e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
cb8e9e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
cb8e9e
@@ -3739,7 +3739,6 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
cb8e9e
         new->skipped_files      = old->skipped_files;
cb8e9e
         new->rebalance_failures = old->rebalance_failures;
cb8e9e
         new->rebalance_time     = old->rebalance_time;
cb8e9e
-        new->dict               = (old->dict ? dict_ref (old->dict) : NULL);
cb8e9e
 
cb8e9e
         /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
cb8e9e
          * import
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e