b51a1f
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
b51a1f
From: Sanju Rakonde <srakonde@redhat.com>
b51a1f
Date: Fri, 26 Jun 2020 12:10:31 +0530
b51a1f
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
b51a1f
 reboot
b51a1f
b51a1f
problem: while the rebalance is in progress, if a node is
b51a1f
rebooted rebalance v status shows the stats of this node as
b51a1f
0 once the node is back.
b51a1f
b51a1f
Reason: when the node is rebooted, once it is back
b51a1f
glusterd_volume_defrag_restart() starts the rebalance and
b51a1f
creates the rpc. but due to some race, rebalance process is
b51a1f
sending disconnect event, so rpc object is getting destroyed. As
b51a1f
the rpc object is null, request for fetching the latest stats is
b51a1f
not sent to rebalance process. and stats are shows as default values
b51a1f
which is 0.
b51a1f
b51a1f
Solution: When the rpc object null, we should create the rpc if the
b51a1f
rebalance process is up. so that request can be sent to rebalance
b51a1f
process using the rpc.
b51a1f
b51a1f
>fixes: #1339
b51a1f
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
b51a1f
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
b51a1f
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
b51a1f
b51a1f
BUG: 1832306
b51a1f
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
b51a1f
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
b51a1f
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
b51a1f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b51a1f
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
b51a1f
---
b51a1f
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
b51a1f
 1 file changed, 20 insertions(+), 9 deletions(-)
b51a1f
b51a1f
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b51a1f
index c78983a..df78fef 100644
b51a1f
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
b51a1f
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b51a1f
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
b51a1f
     rpc_clnt_t *rpc = NULL;
b51a1f
     dict_t *rsp_dict = NULL;
b51a1f
     int32_t cmd = GF_OP_CMD_NONE;
b51a1f
+    glusterd_volinfo_t *volinfo = NULL;
b51a1f
 
b51a1f
     this = THIS;
b51a1f
     rsp_dict = dict_new();
b51a1f
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
b51a1f
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
b51a1f
     {
b51a1f
         rpc = glusterd_pending_node_get_rpc(pending_node);
b51a1f
+        /* In the case of rebalance if the rpc object is null, we try to
b51a1f
+         * create the rpc object. if the rebalance daemon is down, it returns
b51a1f
+         * -1. otherwise, rpc object will be created and referenced.
b51a1f
+         */
b51a1f
         if (!rpc) {
b51a1f
-            if (pending_node->type == GD_NODE_REBALANCE) {
b51a1f
-                ret = 0;
b51a1f
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
b51a1f
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
b51a1f
+                volinfo = pending_node->node;
b51a1f
+                ret = glusterd_rebalance_rpc_create(volinfo);
b51a1f
+                if (ret) {
b51a1f
+                    ret = 0;
b51a1f
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
b51a1f
+                    goto out;
b51a1f
+                } else {
b51a1f
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
b51a1f
+                }
b51a1f
+            } else {
b51a1f
+                ret = -1;
b51a1f
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
b51a1f
+                       "Brick Op failed "
b51a1f
+                       "due to rpc failure.");
b51a1f
                 goto out;
b51a1f
             }
b51a1f
-
b51a1f
-            ret = -1;
b51a1f
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
b51a1f
-                   "Brick Op failed "
b51a1f
-                   "due to rpc failure.");
b51a1f
-            goto out;
b51a1f
         }
b51a1f
 
b51a1f
         /* Redirect operation to be detach tier via rebalance flow. */
b51a1f
-- 
b51a1f
1.8.3.1
b51a1f