14f8ab
From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
14f8ab
From: mohit84 <moagrawa@redhat.com>
14f8ab
Date: Thu, 18 Feb 2021 09:40:44 +0530
14f8ab
Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
14f8ab
 after reboot (#2172)
14f8ab
14f8ab
Rebalance cli is not showing correct status after reboot.
14f8ab
14f8ab
The CLI is not correct status because defrag object is not
14f8ab
valid at the time of creating a rpc connection to show the status.
14f8ab
The defrag object is not valid because at the time of start a glusterd
14f8ab
glusterd_restart_rebalance can be call almost at the same time by two
14f8ab
different synctask and glusterd got a disconnect on rpc object and it
14f8ab
cleanup the defrag object.
14f8ab
14f8ab
Solution: To avoid the defrag object populate a reference count before
14f8ab
          create a defrag rpc object.
14f8ab
>Fixes: #1339
14f8ab
>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
14f8ab
>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
14f8ab
Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
14f8ab
14f8ab
BUG: 1832306
14f8ab
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
14f8ab
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
14f8ab
 xlators/mgmt/glusterd/src/glusterd-syncop.c    |  1 +
14f8ab
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 59 +++++++++++++++++++++++++-
14f8ab
 xlators/mgmt/glusterd/src/glusterd-utils.h     |  5 +++
14f8ab
 xlators/mgmt/glusterd/src/glusterd.h           |  1 +
14f8ab
 5 files changed, 90 insertions(+), 11 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
14f8ab
index b419a89..fcd5318 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
14f8ab
@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
14f8ab
     glusterd_conf_t *priv = NULL;
14f8ab
     xlator_t *this = NULL;
14f8ab
     int pid = -1;
14f8ab
+    int refcnt = 0;
14f8ab
 
14f8ab
     this = THIS;
14f8ab
     if (!this)
14f8ab
@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
14f8ab
         }
14f8ab
 
14f8ab
         case RPC_CLNT_DISCONNECT: {
14f8ab
-            if (!defrag->connected)
14f8ab
-                return 0;
14f8ab
-
14f8ab
             LOCK(&defrag->lock);
14f8ab
             {
14f8ab
+                if (!defrag->connected) {
14f8ab
+                    UNLOCK(&defrag->lock);
14f8ab
+                    return 0;
14f8ab
+                }
14f8ab
                 defrag->connected = 0;
14f8ab
             }
14f8ab
             UNLOCK(&defrag->lock);
14f8ab
@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
14f8ab
             glusterd_defrag_rpc_put(defrag);
14f8ab
             if (defrag->cbk_fn)
14f8ab
                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
14f8ab
-
14f8ab
-            GF_FREE(defrag);
14f8ab
+            refcnt = glusterd_defrag_unref(defrag);
14f8ab
             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
14f8ab
-                   "Rebalance process for volume %s has disconnected.",
14f8ab
-                   volinfo->volname);
14f8ab
+                   "Rebalance process for volume %s has disconnected"
14f8ab
+                   " and defrag refcnt is %d.",
14f8ab
+                   volinfo->volname, refcnt);
14f8ab
             break;
14f8ab
         }
14f8ab
         case RPC_CLNT_DESTROY:
14f8ab
@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
14f8ab
         gf_msg_debug("glusterd", 0, "rebalance command failed");
14f8ab
         goto out;
14f8ab
     }
14f8ab
-
14f8ab
+    /* Take reference before sleep to save defrag object cleanup while
14f8ab
+       glusterd_restart_rebalance call for other bricks by syncktask
14f8ab
+       at the time of restart a glusterd.
14f8ab
+    */
14f8ab
+    glusterd_defrag_ref(defrag);
14f8ab
     sleep(5);
14f8ab
 
14f8ab
     ret = glusterd_rebalance_rpc_create(volinfo);
14f8ab
@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
14f8ab
     GF_ASSERT(this);
14f8ab
     priv = this->private;
14f8ab
     GF_ASSERT(priv);
14f8ab
+    struct rpc_clnt *rpc = NULL;
14f8ab
 
14f8ab
     // rebalance process is not started
14f8ab
     if (!defrag)
14f8ab
@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
14f8ab
     }
14f8ab
 
14f8ab
     glusterd_volinfo_ref(volinfo);
14f8ab
-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
14f8ab
-                              volinfo, _gf_true);
14f8ab
+    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
14f8ab
+                              _gf_false);
14f8ab
     if (ret) {
14f8ab
         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
14f8ab
                "Glusterd RPC creation failed");
14f8ab
         goto out;
14f8ab
     }
14f8ab
+    LOCK(&defrag->lock);
14f8ab
+    {
14f8ab
+        if (!defrag->rpc)
14f8ab
+            defrag->rpc = rpc;
14f8ab
+        else
14f8ab
+            rpc_clnt_unref(rpc);
14f8ab
+    }
14f8ab
+    UNLOCK(&defrag->lock);
14f8ab
     ret = 0;
14f8ab
 out:
14f8ab
     if (options)
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
index df78fef..05c9e11 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
14f8ab
@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
14f8ab
         if (!rpc) {
14f8ab
             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
14f8ab
                 volinfo = pending_node->node;
14f8ab
+                glusterd_defrag_ref(volinfo->rebal.defrag);
14f8ab
                 ret = glusterd_rebalance_rpc_create(volinfo);
14f8ab
                 if (ret) {
14f8ab
                     ret = 0;
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
index bc188a2..9fb8eab 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
@@ -93,6 +93,44 @@
14f8ab
 #define NLMV4_VERSION 4
14f8ab
 #define NLMV1_VERSION 1
14f8ab
 
14f8ab
+int
14f8ab
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
14f8ab
+{
14f8ab
+    int refcnt = 0;
14f8ab
+
14f8ab
+    if (!defrag)
14f8ab
+        goto out;
14f8ab
+
14f8ab
+    LOCK(&defrag->lock);
14f8ab
+    {
14f8ab
+        refcnt = ++defrag->refcnt;
14f8ab
+    }
14f8ab
+    UNLOCK(&defrag->lock);
14f8ab
+
14f8ab
+out:
14f8ab
+    return refcnt;
14f8ab
+}
14f8ab
+
14f8ab
+int
14f8ab
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
14f8ab
+{
14f8ab
+    int refcnt = -1;
14f8ab
+
14f8ab
+    if (!defrag)
14f8ab
+        goto out;
14f8ab
+
14f8ab
+    LOCK(&defrag->lock);
14f8ab
+    {
14f8ab
+        refcnt = --defrag->refcnt;
14f8ab
+        if (refcnt <= 0)
14f8ab
+            GF_FREE(defrag);
14f8ab
+    }
14f8ab
+    UNLOCK(&defrag->lock);
14f8ab
+
14f8ab
+out:
14f8ab
+    return refcnt;
14f8ab
+}
14f8ab
+
14f8ab
 gf_boolean_t
14f8ab
 is_brick_mx_enabled(void)
14f8ab
 {
14f8ab
@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
14f8ab
     char pidfile[PATH_MAX] = "";
14f8ab
     int ret = -1;
14f8ab
     pid_t pid = 0;
14f8ab
+    int refcnt = 0;
14f8ab
 
14f8ab
     this = THIS;
14f8ab
     GF_ASSERT(this);
14f8ab
@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
14f8ab
                              volinfo->volname);
14f8ab
                     goto out;
14f8ab
                 }
14f8ab
-                ret = glusterd_rebalance_rpc_create(volinfo);
14f8ab
+                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
14f8ab
+                /* If refcnt value is 1 it means either defrag object is
14f8ab
+                   poulated by glusterd_rebalance_defrag_init or previous
14f8ab
+                   rpc creation was failed.If it is not 1 it means it(defrag)
14f8ab
+                   was populated at the time of start a rebalance daemon.
14f8ab
+                   We need to create a rpc object only while a previous
14f8ab
+                   rpc connection was not established successfully at the
14f8ab
+                   time of restart a rebalance daemon by
14f8ab
+                   glusterd_handle_defrag_start otherwise rebalance cli
14f8ab
+                   does not show correct status after just reboot a node and try
14f8ab
+                   to print the rebalance status because defrag object has been
14f8ab
+                   destroyed during handling of rpc disconnect.
14f8ab
+                */
14f8ab
+                if (refcnt == 1) {
14f8ab
+                    ret = glusterd_rebalance_rpc_create(volinfo);
14f8ab
+                } else {
14f8ab
+                    ret = 0;
14f8ab
+                    glusterd_defrag_unref(volinfo->rebal.defrag);
14f8ab
+                }
14f8ab
                 break;
14f8ab
             }
14f8ab
         case GF_DEFRAG_STATUS_NOT_STARTED:
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
14f8ab
index 02d85d2..4541471 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
14f8ab
@@ -886,4 +886,9 @@ int32_t
14f8ab
 glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
14f8ab
                            int32_t sub_count);
14f8ab
 
14f8ab
+int
14f8ab
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
14f8ab
+
14f8ab
+int
14f8ab
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
14f8ab
 #endif
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
index efe4d0e..9de3f28 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
14f8ab
     uint64_t total_data;
14f8ab
     uint64_t num_files_lookedup;
14f8ab
     uint64_t total_failures;
14f8ab
+    int refcnt;
14f8ab
     gf_lock_t lock;
14f8ab
     int cmd;
14f8ab
     pthread_t th;
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab