14f8ab
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
14f8ab
From: Sanju Rakonde <srakonde@redhat.com>
14f8ab
Date: Fri, 26 Jun 2020 12:10:31 +0530
14f8ab
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
14f8ab
 reboot
14f8ab
14f8ab
problem: while the rebalance is in progress, if a node is
14f8ab
rebooted rebalance v status shows the stats of this node as
14f8ab
0 once the node is back.
14f8ab
14f8ab
Reason: when the node is rebooted, once it is back
14f8ab
glusterd_volume_defrag_restart() starts the rebalance and
14f8ab
creates the rpc. but due to some race, rebalance process is
14f8ab
sending disconnect event, so rpc object is getting destroyed. As
14f8ab
the rpc object is null, request for fetching the latest stats is
14f8ab
not sent to rebalance process. and stats are shows as default values
14f8ab
which is 0.
14f8ab
14f8ab
Solution: When the rpc object null, we should create the rpc if the
14f8ab
rebalance process is up. so that request can be sent to rebalance
14f8ab
process using the rpc.
14f8ab
14f8ab
>fixes: #1339
14f8ab
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
14f8ab
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
14f8ab
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
14f8ab
14f8ab
BUG: 1832306
14f8ab
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
14f8ab
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
14f8ab
 1 file changed, 20 insertions(+), 9 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
index c78983a..df78fef 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
14f8ab
     rpc_clnt_t *rpc = NULL;
14f8ab
     dict_t *rsp_dict = NULL;
14f8ab
     int32_t cmd = GF_OP_CMD_NONE;
14f8ab
+    glusterd_volinfo_t *volinfo = NULL;
14f8ab
 
14f8ab
     this = THIS;
14f8ab
     rsp_dict = dict_new();
14f8ab
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
14f8ab
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
14f8ab
     {
14f8ab
         rpc = glusterd_pending_node_get_rpc(pending_node);
14f8ab
+        /* In the case of rebalance if the rpc object is null, we try to
14f8ab
+         * create the rpc object. if the rebalance daemon is down, it returns
14f8ab
+         * -1. otherwise, rpc object will be created and referenced.
14f8ab
+         */
14f8ab
         if (!rpc) {
14f8ab
-            if (pending_node->type == GD_NODE_REBALANCE) {
14f8ab
-                ret = 0;
14f8ab
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
14f8ab
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
14f8ab
+                volinfo = pending_node->node;
14f8ab
+                ret = glusterd_rebalance_rpc_create(volinfo);
14f8ab
+                if (ret) {
14f8ab
+                    ret = 0;
14f8ab
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
14f8ab
+                    goto out;
14f8ab
+                } else {
14f8ab
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
14f8ab
+                }
14f8ab
+            } else {
14f8ab
+                ret = -1;
14f8ab
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
14f8ab
+                       "Brick Op failed "
14f8ab
+                       "due to rpc failure.");
14f8ab
                 goto out;
14f8ab
             }
14f8ab
-
14f8ab
-            ret = -1;
14f8ab
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
14f8ab
-                   "Brick Op failed "
14f8ab
-                   "due to rpc failure.");
14f8ab
-            goto out;
14f8ab
         }
14f8ab
 
14f8ab
         /* Redirect operation to be detach tier via rebalance flow. */
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab