17b94a
From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001
17b94a
From: Sanju Rakonde <srakonde@redhat.com>
17b94a
Date: Tue, 22 Oct 2019 15:06:29 +0530
17b94a
Subject: [PATCH 323/335] cli: display detailed rebalance info
17b94a
17b94a
Problem: When one of the node is down in cluster,
17b94a
rebalance status is not displaying detailed
17b94a
information.
17b94a
17b94a
Cause: In glusterd_volume_rebalance_use_rsp_dict()
17b94a
we are aggregating rsp from all the nodes into a
17b94a
dictionary and sending it to cli for printing. While
17b94a
assigning a index to keys we are considering all the
17b94a
peers instead of considering only the peers which are
17b94a
up. Because of which, index is not reaching till 1.
17b94a
while parsing the rsp cli unable to find status-1
17b94a
key in dictionary and going out without printing
17b94a
any information.
17b94a
17b94a
Solution: The simplest fix for this without much
17b94a
code change is to continue to look for other keys
17b94a
when status-1 key is not found.
17b94a
17b94a
> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588
17b94a
> fixes: bz#1764119
17b94a
> Change-Id: I0062839933c9706119eb85416256eade97e976dc
17b94a
> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
17b94a
17b94a
BUG: 1761326
17b94a
Change-Id: I0062839933c9706119eb85416256eade97e976dc
17b94a
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/185749
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 cli/src/cli-rpc-ops.c                      | 21 ++++++++++++++-------
17b94a
 tests/bugs/glusterd/rebalance-in-cluster.t |  9 +++++++++
17b94a
 2 files changed, 23 insertions(+), 7 deletions(-)
17b94a
17b94a
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
17b94a
index b167e26..4e91265 100644
17b94a
--- a/cli/src/cli-rpc-ops.c
17b94a
+++ b/cli/src/cli-rpc-ops.c
17b94a
@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
-    snprintf(key, sizeof(key), "status-1");
17b94a
-
17b94a
-    ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
17b94a
-    if (ret) {
17b94a
-        gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1);
17b94a
-        gf_log("cli", GF_LOG_TRACE, "failed to get status");
17b94a
-        goto out;
17b94a
+    for (i = 1; i <= count; i++) {
17b94a
+        snprintf(key, sizeof(key), "status-%d", i);
17b94a
+        ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
17b94a
+        /* If information from a node is missing we should skip
17b94a
+         * the node and try to fetch information of other nodes.
17b94a
+         * If information is not found for all nodes, we should
17b94a
+         * error out.
17b94a
+         */
17b94a
+        if (!ret)
17b94a
+            break;
17b94a
+        if (ret && i == count) {
17b94a
+            gf_log("cli", GF_LOG_TRACE, "failed to get status");
17b94a
+            goto out;
17b94a
+        }
17b94a
     }
17b94a
 
17b94a
     /* Fix layout will be sent to all nodes for the volume
17b94a
diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
17b94a
index 9565fae..469ec6c 100644
17b94a
--- a/tests/bugs/glusterd/rebalance-in-cluster.t
17b94a
+++ b/tests/bugs/glusterd/rebalance-in-cluster.t
17b94a
@@ -4,6 +4,10 @@
17b94a
 . $(dirname $0)/../../cluster.rc
17b94a
 . $(dirname $0)/../../volume.rc
17b94a
 
17b94a
+function rebalance_status_field_1 {
17b94a
+        $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
17b94a
+}
17b94a
+
17b94a
 cleanup;
17b94a
 TEST launch_cluster 2;
17b94a
 TEST $CLI_1 peer probe $H2;
17b94a
@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
17b94a
 TEST $CLI_1 volume rebalance $V0  start
17b94a
 EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1  $V0
17b94a
 
17b94a
+#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
17b94a
+TEST kill_glusterd 2
17b94a
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
17b94a
+
17b94a
+TEST start_glusterd 2
17b94a
 #bug-1245142
17b94a
 
17b94a
 $CLI_1 volume rebalance $V0  start &
17b94a
-- 
17b94a
1.8.3.1
17b94a