17b94a
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
17b94a
From: Sanju Rakonde <srakonde@redhat.com>
17b94a
Date: Fri, 26 Jun 2020 12:10:31 +0530
17b94a
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
17b94a
 reboot
17b94a
17b94a
problem: while the rebalance is in progress, if a node is
17b94a
rebooted rebalance v status shows the stats of this node as
17b94a
0 once the node is back.
17b94a
17b94a
Reason: when the node is rebooted, once it is back
17b94a
glusterd_volume_defrag_restart() starts the rebalance and
17b94a
creates the rpc. but due to some race, rebalance process is
17b94a
sending disconnect event, so rpc object is getting destroyed. As
17b94a
the rpc object is null, request for fetching the latest stats is
17b94a
not sent to rebalance process. and stats are shows as default values
17b94a
which is 0.
17b94a
17b94a
Solution: When the rpc object null, we should create the rpc if the
17b94a
rebalance process is up. so that request can be sent to rebalance
17b94a
process using the rpc.
17b94a
17b94a
>fixes: #1339
17b94a
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
17b94a
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
17b94a
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
17b94a
17b94a
BUG: 1832306
17b94a
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
17b94a
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
17b94a
 1 file changed, 20 insertions(+), 9 deletions(-)
17b94a
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
index c78983a..df78fef 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
17b94a
     rpc_clnt_t *rpc = NULL;
17b94a
     dict_t *rsp_dict = NULL;
17b94a
     int32_t cmd = GF_OP_CMD_NONE;
17b94a
+    glusterd_volinfo_t *volinfo = NULL;
17b94a
 
17b94a
     this = THIS;
17b94a
     rsp_dict = dict_new();
17b94a
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
17b94a
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
17b94a
     {
17b94a
         rpc = glusterd_pending_node_get_rpc(pending_node);
17b94a
+        /* In the case of rebalance if the rpc object is null, we try to
17b94a
+         * create the rpc object. if the rebalance daemon is down, it returns
17b94a
+         * -1. otherwise, rpc object will be created and referenced.
17b94a
+         */
17b94a
         if (!rpc) {
17b94a
-            if (pending_node->type == GD_NODE_REBALANCE) {
17b94a
-                ret = 0;
17b94a
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
17b94a
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
17b94a
+                volinfo = pending_node->node;
17b94a
+                ret = glusterd_rebalance_rpc_create(volinfo);
17b94a
+                if (ret) {
17b94a
+                    ret = 0;
17b94a
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
17b94a
+                    goto out;
17b94a
+                } else {
17b94a
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
17b94a
+                }
17b94a
+            } else {
17b94a
+                ret = -1;
17b94a
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
17b94a
+                       "Brick Op failed "
17b94a
+                       "due to rpc failure.");
17b94a
                 goto out;
17b94a
             }
17b94a
-
17b94a
-            ret = -1;
17b94a
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
17b94a
-                   "Brick Op failed "
17b94a
-                   "due to rpc failure.");
17b94a
-            goto out;
17b94a
         }
17b94a
 
17b94a
         /* Redirect operation to be detach tier via rebalance flow. */
17b94a
-- 
17b94a
1.8.3.1
17b94a