7f4c2a
From 84007736637b8278858401d1988c1ea39df530d0 Mon Sep 17 00:00:00 2001
7f4c2a
From: Atin Mukherjee <amukherj@redhat.com>
7f4c2a
Date: Tue, 21 Jul 2015 09:57:43 +0530
7f4c2a
Subject: [PATCH 300/304] glusterd: Don't allow remove brick start/commit if glusterd is down of the host of the brick
7f4c2a
7f4c2a
Backport of http://review.gluster.org/#/c/11726/
7f4c2a
7f4c2a
remove brick stage blindly starts the remove brick operation even if the
7f4c2a
glusterd instance of the node hosting the brick is down. Operationally its
7f4c2a
incorrect and this could result into a inconsistent rebalance status across all
7f4c2a
the nodes as the originator of this command will always have the rebalance
7f4c2a
status to 'DEFRAG_NOT_STARTED', however when the glusterd instance on the other
7f4c2a
nodes comes up, will trigger rebalance and make the status to completed once the
7f4c2a
rebalance is finished.
7f4c2a
7f4c2a
This patch fixes two things:
7f4c2a
1. Add a validation in remove brick to check whether all the peers hosting the
7f4c2a
bricks to be removed are up.
7f4c2a
7f4c2a
2. Don't copy volinfo->rebal.dict from stale volinfo during restore as this
7f4c2a
might end up in a incosistent node_state.info file resulting into volume status
7f4c2a
command failure.
7f4c2a
7f4c2a
Change-Id: Ia4a76865c05037d49eec5e3bbfaf68c1567f1f81
7f4c2a
BUG: 1236038
7f4c2a
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
7f4c2a
Reviewed-on: http://review.gluster.org/11726
7f4c2a
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
7f4c2a
Reviewed-by: N Balachandran <nbalacha@redhat.com>
7f4c2a
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/56353
7f4c2a
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
7f4c2a
---
7f4c2a
 .../glusterd/bug-1245045-remove-brick-validation.t |   54 +++++++++
7f4c2a
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c     |  119 +++++++++++++++-----
7f4c2a
 xlators/mgmt/glusterd/src/glusterd-utils.c         |    1 -
7f4c2a
 3 files changed, 143 insertions(+), 31 deletions(-)
7f4c2a
 create mode 100644 tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
7f4c2a
7f4c2a
diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
7f4c2a
new file mode 100644
7f4c2a
index 0000000..22a8d55
7f4c2a
--- /dev/null
7f4c2a
+++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
7f4c2a
@@ -0,0 +1,54 @@
7f4c2a
+#!/bin/bash
7f4c2a
+
7f4c2a
+. $(dirname $0)/../../include.rc
7f4c2a
+. $(dirname $0)/../../cluster.rc
7f4c2a
+
7f4c2a
+cleanup
7f4c2a
+
7f4c2a
+TEST launch_cluster 3;
7f4c2a
+TEST $CLI_1 peer probe $H2;
7f4c2a
+TEST $CLI_1 peer probe $H3;
7f4c2a
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
7f4c2a
+
7f4c2a
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
7f4c2a
+TEST $CLI_1 volume start $V0
7f4c2a
+
7f4c2a
+kill_glusterd 2
7f4c2a
+
7f4c2a
+#remove-brick should fail as the peer hosting the brick is down
7f4c2a
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
7f4c2a
+
7f4c2a
+TEST start_glusterd 2
7f4c2a
+
7f4c2a
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
7f4c2a
+
7f4c2a
+#volume status should work
7f4c2a
+TEST $CLI_2 volume status
7f4c2a
+
7f4c2a
+
7f4c2a
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
7f4c2a
+kill_glusterd 2
7f4c2a
+
7f4c2a
+#remove-brick commit should fail as the peer hosting the brick is down
7f4c2a
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit
7f4c2a
+
7f4c2a
+TEST start_glusterd 2
7f4c2a
+
7f4c2a
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
7f4c2a
+
7f4c2a
+#volume status should work
7f4c2a
+TEST $CLI_2 volume status
7f4c2a
+
7f4c2a
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop
7f4c2a
+
7f4c2a
+kill_glusterd 3
7f4c2a
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
7f4c2a
+
7f4c2a
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
7f4c2a
+
7f4c2a
+TEST start_glusterd 3
7f4c2a
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
7f4c2a
+
7f4c2a
+TEST $CLI_3 volume status
7f4c2a
+
7f4c2a
+cleanup
7f4c2a
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
7f4c2a
index caf51a4..e1a0397 100644
7f4c2a
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
7f4c2a
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
7f4c2a
@@ -1661,6 +1661,83 @@ out:
7f4c2a
         return ret;
7f4c2a
 }
7f4c2a
 
7f4c2a
+static int
7f4c2a
+glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
7f4c2a
+                                       dict_t *dict,
7f4c2a
+                                       glusterd_volinfo_t *volinfo,
7f4c2a
+                                       char **errstr)
7f4c2a
+{
7f4c2a
+        char                   *brick       = NULL;
7f4c2a
+        char                    msg[2048]   = {0,};
7f4c2a
+        char                    key[256]    = {0,};
7f4c2a
+        glusterd_brickinfo_t   *brickinfo   = NULL;
7f4c2a
+        glusterd_peerinfo_t    *peerinfo    = NULL;
7f4c2a
+        int                     i           = 0;
7f4c2a
+        int                     ret         = -1;
7f4c2a
+
7f4c2a
+        /* Check whether all the nodes of the bricks to be removed are
7f4c2a
+        * up, if not fail the operation */
7f4c2a
+        for (i = 1; i <= brick_count; i++) {
7f4c2a
+                snprintf (key, sizeof (key), "brick%d", i);
7f4c2a
+                ret = dict_get_str (dict, key, &brick);
7f4c2a
+                if (ret) {
7f4c2a
+                        snprintf (msg, sizeof (msg),
7f4c2a
+                                  "Unable to get %s", key);
7f4c2a
+                        *errstr = gf_strdup (msg);
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+
7f4c2a
+                ret =
7f4c2a
+                glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
7f4c2a
+                                                       &brickinfo);
7f4c2a
+                if (ret) {
7f4c2a
+                        snprintf (msg, sizeof (msg), "Incorrect brick "
7f4c2a
+                                  "%s for volume %s", brick, volinfo->volname);
7f4c2a
+                        *errstr = gf_strdup (msg);
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+                /* Do not allow commit if the bricks are not decommissioned
7f4c2a
+                 * if its a remove brick commit
7f4c2a
+                 */
7f4c2a
+                if (cmd == GF_OP_CMD_COMMIT && !brickinfo->decommissioned) {
7f4c2a
+                        snprintf (msg, sizeof (msg), "Brick %s "
7f4c2a
+                                  "is not decommissioned. "
7f4c2a
+                                  "Use start or force option",
7f4c2a
+                                  brick);
7f4c2a
+                        *errstr = gf_strdup (msg);
7f4c2a
+                        ret = -1;
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+
7f4c2a
+                if (glusterd_is_local_brick (THIS, volinfo, brickinfo))
7f4c2a
+                        continue;
7f4c2a
+
7f4c2a
+                rcu_read_lock ();
7f4c2a
+                peerinfo = glusterd_peerinfo_find_by_uuid
7f4c2a
+                                                (brickinfo->uuid);
7f4c2a
+                if (!peerinfo) {
7f4c2a
+                        snprintf (msg, sizeof(msg), "Host node of the "
7f4c2a
+                                  "brick %s is not in cluster", brick);
7f4c2a
+                        *errstr = gf_strdup (msg);
7f4c2a
+                        ret = -1;
7f4c2a
+                        rcu_read_unlock ();
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+                if (!peerinfo->connected) {
7f4c2a
+                        snprintf (msg, sizeof(msg), "Host node of the "
7f4c2a
+                                  "brick %s is down", brick);
7f4c2a
+                        *errstr = gf_strdup (msg);
7f4c2a
+                        ret = -1;
7f4c2a
+                        rcu_read_unlock ();
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+                rcu_read_unlock ();
7f4c2a
+        }
7f4c2a
+
7f4c2a
+out:
7f4c2a
+        return ret;
7f4c2a
+}
7f4c2a
+
7f4c2a
 int
7f4c2a
 glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
7f4c2a
 {
7f4c2a
@@ -1679,6 +1756,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
7f4c2a
         char                   *brick       = NULL;
7f4c2a
         glusterd_brickinfo_t   *brickinfo   = NULL;
7f4c2a
         gsync_status_param_t    param       = {0,};
7f4c2a
+        glusterd_peerinfo_t    *peerinfo    = NULL;
7f4c2a
 
7f4c2a
         this = THIS;
7f4c2a
         GF_ASSERT (this);
7f4c2a
@@ -1817,6 +1895,12 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
7f4c2a
                         goto out;
7f4c2a
                 }
7f4c2a
 
7f4c2a
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
7f4c2a
+                                                             dict, volinfo,
7f4c2a
+                                                             &errstr);
7f4c2a
+                if (ret)
7f4c2a
+                        goto out;
7f4c2a
+
7f4c2a
                 if (is_origin_glusterd (dict)) {
7f4c2a
                         ret = glusterd_generate_and_set_task_id
7f4c2a
                                 (dict, GF_REMOVE_BRICK_TID_KEY);
7f4c2a
@@ -1861,36 +1945,11 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
7f4c2a
                         goto out;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                /* Do not allow commit if the bricks are not decommissioned */
7f4c2a
-                for ( i = 1; i <= brick_count; i++ ) {
7f4c2a
-                        snprintf (key, sizeof (key), "brick%d", i);
7f4c2a
-                        ret = dict_get_str (dict, key, &brick);
7f4c2a
-                        if (ret) {
7f4c2a
-                                snprintf (msg, sizeof (msg),
7f4c2a
-                                          "Unable to get %s", key);
7f4c2a
-                                errstr = gf_strdup (msg);
7f4c2a
-                                goto out;
7f4c2a
-                        }
7f4c2a
-
7f4c2a
-                        ret =
7f4c2a
-                        glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
7f4c2a
-                                                               &brickinfo);
7f4c2a
-                        if (ret) {
7f4c2a
-                                snprintf (msg, sizeof (msg), "Incorrect brick "
7f4c2a
-                                          "%s for volume %s", brick, volname);
7f4c2a
-                                errstr = gf_strdup (msg);
7f4c2a
-                                goto out;
7f4c2a
-                        }
7f4c2a
-                        if ( !brickinfo->decommissioned ) {
7f4c2a
-                                snprintf (msg, sizeof (msg), "Brick %s "
7f4c2a
-                                          "is not decommissioned. "
7f4c2a
-                                          "Use start or force option",
7f4c2a
-                                          brick);
7f4c2a
-                                errstr = gf_strdup (msg);
7f4c2a
-                                ret = -1;
7f4c2a
-                                goto out;
7f4c2a
-                        }
7f4c2a
-                }
7f4c2a
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
7f4c2a
+                                                             dict, volinfo,
7f4c2a
+                                                             &errstr);
7f4c2a
+                if (ret)
7f4c2a
+                        goto out;
7f4c2a
 
7f4c2a
                 /* If geo-rep is configured, for this volume, it should be
7f4c2a
                  * stopped.
7f4c2a
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
7f4c2a
index 4982828..0f0ca27 100644
7f4c2a
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
7f4c2a
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
7f4c2a
@@ -3739,7 +3739,6 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
7f4c2a
         new->skipped_files      = old->skipped_files;
7f4c2a
         new->rebalance_failures = old->rebalance_failures;
7f4c2a
         new->rebalance_time     = old->rebalance_time;
7f4c2a
-        new->dict               = (old->dict ? dict_ref (old->dict) : NULL);
7f4c2a
 
7f4c2a
         /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
7f4c2a
          * import
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a