b7d4d7
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
b7d4d7
From: Sanju Rakonde <srakonde@redhat.com>
b7d4d7
Date: Fri, 26 Jun 2020 12:10:31 +0530
b7d4d7
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
b7d4d7
 reboot
b7d4d7
b7d4d7
problem: while the rebalance is in progress, if a node is
b7d4d7
rebooted rebalance v status shows the stats of this node as
b7d4d7
0 once the node is back.
b7d4d7
b7d4d7
Reason: when the node is rebooted, once it is back
b7d4d7
glusterd_volume_defrag_restart() starts the rebalance and
b7d4d7
creates the rpc. but due to some race, rebalance process is
b7d4d7
sending disconnect event, so rpc object is getting destroyed. As
b7d4d7
the rpc object is null, request for fetching the latest stats is
b7d4d7
not sent to rebalance process. and stats are shows as default values
b7d4d7
which is 0.
b7d4d7
b7d4d7
Solution: When the rpc object null, we should create the rpc if the
b7d4d7
rebalance process is up. so that request can be sent to rebalance
b7d4d7
process using the rpc.
b7d4d7
b7d4d7
>fixes: #1339
b7d4d7
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
b7d4d7
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
b7d4d7
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
b7d4d7
b7d4d7
BUG: 1832306
b7d4d7
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
b7d4d7
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
b7d4d7
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
b7d4d7
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b7d4d7
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
b7d4d7
---
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
b7d4d7
 1 file changed, 20 insertions(+), 9 deletions(-)
b7d4d7
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
index c78983a..df78fef 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
b7d4d7
     rpc_clnt_t *rpc = NULL;
b7d4d7
     dict_t *rsp_dict = NULL;
b7d4d7
     int32_t cmd = GF_OP_CMD_NONE;
b7d4d7
+    glusterd_volinfo_t *volinfo = NULL;
b7d4d7
 
b7d4d7
     this = THIS;
b7d4d7
     rsp_dict = dict_new();
b7d4d7
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
b7d4d7
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
b7d4d7
     {
b7d4d7
         rpc = glusterd_pending_node_get_rpc(pending_node);
b7d4d7
+        /* In the case of rebalance if the rpc object is null, we try to
b7d4d7
+         * create the rpc object. if the rebalance daemon is down, it returns
b7d4d7
+         * -1. otherwise, rpc object will be created and referenced.
b7d4d7
+         */
b7d4d7
         if (!rpc) {
b7d4d7
-            if (pending_node->type == GD_NODE_REBALANCE) {
b7d4d7
-                ret = 0;
b7d4d7
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
b7d4d7
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
b7d4d7
+                volinfo = pending_node->node;
b7d4d7
+                ret = glusterd_rebalance_rpc_create(volinfo);
b7d4d7
+                if (ret) {
b7d4d7
+                    ret = 0;
b7d4d7
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
b7d4d7
+                    goto out;
b7d4d7
+                } else {
b7d4d7
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
b7d4d7
+                }
b7d4d7
+            } else {
b7d4d7
+                ret = -1;
b7d4d7
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
b7d4d7
+                       "Brick Op failed "
b7d4d7
+                       "due to rpc failure.");
b7d4d7
                 goto out;
b7d4d7
             }
b7d4d7
-
b7d4d7
-            ret = -1;
b7d4d7
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
b7d4d7
-                   "Brick Op failed "
b7d4d7
-                   "due to rpc failure.");
b7d4d7
-            goto out;
b7d4d7
         }
b7d4d7
 
b7d4d7
         /* Redirect operation to be detach tier via rebalance flow. */
b7d4d7
-- 
b7d4d7
1.8.3.1
b7d4d7