74096c
From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
74096c
From: mohit84 <moagrawa@redhat.com>
74096c
Date: Thu, 18 Feb 2021 09:40:44 +0530
74096c
Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
74096c
 after reboot (#2172)
74096c
74096c
Rebalance cli is not showing correct status after reboot.
74096c
74096c
The CLI is not correct status because defrag object is not
74096c
valid at the time of creating a rpc connection to show the status.
74096c
The defrag object is not valid because at the time of start a glusterd
74096c
glusterd_restart_rebalance can be call almost at the same time by two
74096c
different synctask and glusterd got a disconnect on rpc object and it
74096c
cleanup the defrag object.
74096c
74096c
Solution: To avoid the defrag object populate a reference count before
74096c
          create a defrag rpc object.
74096c
>Fixes: #1339
74096c
>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
74096c
>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
74096c
Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
74096c
74096c
BUG: 1832306
74096c
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
74096c
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
74096c
 xlators/mgmt/glusterd/src/glusterd-syncop.c    |  1 +
74096c
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 59 +++++++++++++++++++++++++-
74096c
 xlators/mgmt/glusterd/src/glusterd-utils.h     |  5 +++
74096c
 xlators/mgmt/glusterd/src/glusterd.h           |  1 +
74096c
 5 files changed, 90 insertions(+), 11 deletions(-)
74096c
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
74096c
index b419a89..fcd5318 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
74096c
@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
74096c
     glusterd_conf_t *priv = NULL;
74096c
     xlator_t *this = NULL;
74096c
     int pid = -1;
74096c
+    int refcnt = 0;
74096c
 
74096c
     this = THIS;
74096c
     if (!this)
74096c
@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
74096c
         }
74096c
 
74096c
         case RPC_CLNT_DISCONNECT: {
74096c
-            if (!defrag->connected)
74096c
-                return 0;
74096c
-
74096c
             LOCK(&defrag->lock);
74096c
             {
74096c
+                if (!defrag->connected) {
74096c
+                    UNLOCK(&defrag->lock);
74096c
+                    return 0;
74096c
+                }
74096c
                 defrag->connected = 0;
74096c
             }
74096c
             UNLOCK(&defrag->lock);
74096c
@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
74096c
             glusterd_defrag_rpc_put(defrag);
74096c
             if (defrag->cbk_fn)
74096c
                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
74096c
-
74096c
-            GF_FREE(defrag);
74096c
+            refcnt = glusterd_defrag_unref(defrag);
74096c
             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
74096c
-                   "Rebalance process for volume %s has disconnected.",
74096c
-                   volinfo->volname);
74096c
+                   "Rebalance process for volume %s has disconnected"
74096c
+                   " and defrag refcnt is %d.",
74096c
+                   volinfo->volname, refcnt);
74096c
             break;
74096c
         }
74096c
         case RPC_CLNT_DESTROY:
74096c
@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
74096c
         gf_msg_debug("glusterd", 0, "rebalance command failed");
74096c
         goto out;
74096c
     }
74096c
-
74096c
+    /* Take reference before sleep to save defrag object cleanup while
74096c
+       glusterd_restart_rebalance call for other bricks by syncktask
74096c
+       at the time of restart a glusterd.
74096c
+    */
74096c
+    glusterd_defrag_ref(defrag);
74096c
     sleep(5);
74096c
 
74096c
     ret = glusterd_rebalance_rpc_create(volinfo);
74096c
@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
74096c
     GF_ASSERT(this);
74096c
     priv = this->private;
74096c
     GF_ASSERT(priv);
74096c
+    struct rpc_clnt *rpc = NULL;
74096c
 
74096c
     // rebalance process is not started
74096c
     if (!defrag)
74096c
@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
74096c
     }
74096c
 
74096c
     glusterd_volinfo_ref(volinfo);
74096c
-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
74096c
-                              volinfo, _gf_true);
74096c
+    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
74096c
+                              _gf_false);
74096c
     if (ret) {
74096c
         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
74096c
                "Glusterd RPC creation failed");
74096c
         goto out;
74096c
     }
74096c
+    LOCK(&defrag->lock);
74096c
+    {
74096c
+        if (!defrag->rpc)
74096c
+            defrag->rpc = rpc;
74096c
+        else
74096c
+            rpc_clnt_unref(rpc);
74096c
+    }
74096c
+    UNLOCK(&defrag->lock);
74096c
     ret = 0;
74096c
 out:
74096c
     if (options)
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
74096c
index df78fef..05c9e11 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
74096c
@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
74096c
         if (!rpc) {
74096c
             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
74096c
                 volinfo = pending_node->node;
74096c
+                glusterd_defrag_ref(volinfo->rebal.defrag);
74096c
                 ret = glusterd_rebalance_rpc_create(volinfo);
74096c
                 if (ret) {
74096c
                     ret = 0;
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
index bc188a2..9fb8eab 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
@@ -93,6 +93,44 @@
74096c
 #define NLMV4_VERSION 4
74096c
 #define NLMV1_VERSION 1
74096c
 
74096c
+int
74096c
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
74096c
+{
74096c
+    int refcnt = 0;
74096c
+
74096c
+    if (!defrag)
74096c
+        goto out;
74096c
+
74096c
+    LOCK(&defrag->lock);
74096c
+    {
74096c
+        refcnt = ++defrag->refcnt;
74096c
+    }
74096c
+    UNLOCK(&defrag->lock);
74096c
+
74096c
+out:
74096c
+    return refcnt;
74096c
+}
74096c
+
74096c
+int
74096c
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
74096c
+{
74096c
+    int refcnt = -1;
74096c
+
74096c
+    if (!defrag)
74096c
+        goto out;
74096c
+
74096c
+    LOCK(&defrag->lock);
74096c
+    {
74096c
+        refcnt = --defrag->refcnt;
74096c
+        if (refcnt <= 0)
74096c
+            GF_FREE(defrag);
74096c
+    }
74096c
+    UNLOCK(&defrag->lock);
74096c
+
74096c
+out:
74096c
+    return refcnt;
74096c
+}
74096c
+
74096c
 gf_boolean_t
74096c
 is_brick_mx_enabled(void)
74096c
 {
74096c
@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
74096c
     char pidfile[PATH_MAX] = "";
74096c
     int ret = -1;
74096c
     pid_t pid = 0;
74096c
+    int refcnt = 0;
74096c
 
74096c
     this = THIS;
74096c
     GF_ASSERT(this);
74096c
@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
74096c
                              volinfo->volname);
74096c
                     goto out;
74096c
                 }
74096c
-                ret = glusterd_rebalance_rpc_create(volinfo);
74096c
+                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
74096c
+                /* If refcnt value is 1 it means either defrag object is
74096c
+                   poulated by glusterd_rebalance_defrag_init or previous
74096c
+                   rpc creation was failed.If it is not 1 it means it(defrag)
74096c
+                   was populated at the time of start a rebalance daemon.
74096c
+                   We need to create a rpc object only while a previous
74096c
+                   rpc connection was not established successfully at the
74096c
+                   time of restart a rebalance daemon by
74096c
+                   glusterd_handle_defrag_start otherwise rebalance cli
74096c
+                   does not show correct status after just reboot a node and try
74096c
+                   to print the rebalance status because defrag object has been
74096c
+                   destroyed during handling of rpc disconnect.
74096c
+                */
74096c
+                if (refcnt == 1) {
74096c
+                    ret = glusterd_rebalance_rpc_create(volinfo);
74096c
+                } else {
74096c
+                    ret = 0;
74096c
+                    glusterd_defrag_unref(volinfo->rebal.defrag);
74096c
+                }
74096c
                 break;
74096c
             }
74096c
         case GF_DEFRAG_STATUS_NOT_STARTED:
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
74096c
index 02d85d2..4541471 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
74096c
@@ -886,4 +886,9 @@ int32_t
74096c
 glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
74096c
                            int32_t sub_count);
74096c
 
74096c
+int
74096c
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
74096c
+
74096c
+int
74096c
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
74096c
 #endif
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
74096c
index efe4d0e..9de3f28 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd.h
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd.h
74096c
@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
74096c
     uint64_t total_data;
74096c
     uint64_t num_files_lookedup;
74096c
     uint64_t total_failures;
74096c
+    int refcnt;
74096c
     gf_lock_t lock;
74096c
     int cmd;
74096c
     pthread_t th;
74096c
-- 
74096c
1.8.3.1
74096c