Blob Blame History Raw
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
From: Sanju Rakonde <srakonde@redhat.com>
Date: Fri, 26 Jun 2020 12:10:31 +0530
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
 reboot

problem: while the rebalance is in progress, if a node is
rebooted rebalance v status shows the stats of this node as
0 once the node is back.

Reason: when the node is rebooted, once it is back
glusterd_volume_defrag_restart() starts the rebalance and
creates the rpc. but due to some race, rebalance process is
sending disconnect event, so rpc object is getting destroyed. As
the rpc object is null, request for fetching the latest stats is
not sent to rebalance process. and stats are shows as default values
which is 0.

Solution: When the rpc object null, we should create the rpc if the
rebalance process is up. so that request can be sent to rebalance
process using the rpc.

>fixes: #1339
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641

BUG: 1832306
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index c78983a..df78fef 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
     rpc_clnt_t *rpc = NULL;
     dict_t *rsp_dict = NULL;
     int32_t cmd = GF_OP_CMD_NONE;
+    glusterd_volinfo_t *volinfo = NULL;
 
     this = THIS;
     rsp_dict = dict_new();
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
     {
         rpc = glusterd_pending_node_get_rpc(pending_node);
+        /* In the case of rebalance if the rpc object is null, we try to
+         * create the rpc object. if the rebalance daemon is down, it returns
+         * -1. otherwise, rpc object will be created and referenced.
+         */
         if (!rpc) {
-            if (pending_node->type == GD_NODE_REBALANCE) {
-                ret = 0;
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
+                volinfo = pending_node->node;
+                ret = glusterd_rebalance_rpc_create(volinfo);
+                if (ret) {
+                    ret = 0;
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+                    goto out;
+                } else {
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
+                }
+            } else {
+                ret = -1;
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+                       "Brick Op failed "
+                       "due to rpc failure.");
                 goto out;
             }
-
-            ret = -1;
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
-                   "Brick Op failed "
-                   "due to rpc failure.");
-            goto out;
         }
 
         /* Redirect operation to be detach tier via rebalance flow. */
-- 
1.8.3.1