d2787b
From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
d2787b
From: Sanju Rakonde <srakonde@redhat.com>
d2787b
Date: Fri, 26 Jun 2020 12:10:31 +0530
d2787b
Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
d2787b
 reboot
d2787b
d2787b
problem: while the rebalance is in progress, if a node is
d2787b
rebooted rebalance v status shows the stats of this node as
d2787b
0 once the node is back.
d2787b
d2787b
Reason: when the node is rebooted, once it is back
d2787b
glusterd_volume_defrag_restart() starts the rebalance and
d2787b
creates the rpc. but due to some race, rebalance process is
d2787b
sending disconnect event, so rpc object is getting destroyed. As
d2787b
the rpc object is null, request for fetching the latest stats is
d2787b
not sent to rebalance process. and stats are shows as default values
d2787b
which is 0.
d2787b
d2787b
Solution: When the rpc object null, we should create the rpc if the
d2787b
rebalance process is up. so that request can be sent to rebalance
d2787b
process using the rpc.
d2787b
d2787b
>fixes: #1339
d2787b
>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
d2787b
>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
d2787b
Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
d2787b
d2787b
BUG: 1832306
d2787b
Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
d2787b
Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
d2787b
Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
d2787b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d2787b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d2787b
---
d2787b
 xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
d2787b
 1 file changed, 20 insertions(+), 9 deletions(-)
d2787b
d2787b
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
d2787b
index c78983a..df78fef 100644
d2787b
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
d2787b
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
d2787b
@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
d2787b
     rpc_clnt_t *rpc = NULL;
d2787b
     dict_t *rsp_dict = NULL;
d2787b
     int32_t cmd = GF_OP_CMD_NONE;
d2787b
+    glusterd_volinfo_t *volinfo = NULL;
d2787b
 
d2787b
     this = THIS;
d2787b
     rsp_dict = dict_new();
d2787b
@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
d2787b
     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
d2787b
     {
d2787b
         rpc = glusterd_pending_node_get_rpc(pending_node);
d2787b
+        /* In the case of rebalance if the rpc object is null, we try to
d2787b
+         * create the rpc object. if the rebalance daemon is down, it returns
d2787b
+         * -1. otherwise, rpc object will be created and referenced.
d2787b
+         */
d2787b
         if (!rpc) {
d2787b
-            if (pending_node->type == GD_NODE_REBALANCE) {
d2787b
-                ret = 0;
d2787b
-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
d2787b
+            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
d2787b
+                volinfo = pending_node->node;
d2787b
+                ret = glusterd_rebalance_rpc_create(volinfo);
d2787b
+                if (ret) {
d2787b
+                    ret = 0;
d2787b
+                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
d2787b
+                    goto out;
d2787b
+                } else {
d2787b
+                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
d2787b
+                }
d2787b
+            } else {
d2787b
+                ret = -1;
d2787b
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
d2787b
+                       "Brick Op failed "
d2787b
+                       "due to rpc failure.");
d2787b
                 goto out;
d2787b
             }
d2787b
-
d2787b
-            ret = -1;
d2787b
-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
d2787b
-                   "Brick Op failed "
d2787b
-                   "due to rpc failure.");
d2787b
-            goto out;
d2787b
         }
d2787b
 
d2787b
         /* Redirect operation to be detach tier via rebalance flow. */
d2787b
-- 
d2787b
1.8.3.1
d2787b