17b94a
From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
17b94a
From: mohit84 <moagrawa@redhat.com>
17b94a
Date: Thu, 18 Feb 2021 09:40:44 +0530
17b94a
Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
17b94a
 after reboot (#2172)
17b94a
17b94a
Rebalance cli is not showing correct status after reboot.
17b94a
17b94a
The CLI is not correct status because defrag object is not
17b94a
valid at the time of creating a rpc connection to show the status.
17b94a
The defrag object is not valid because at the time of start a glusterd
17b94a
glusterd_restart_rebalance can be call almost at the same time by two
17b94a
different synctask and glusterd got a disconnect on rpc object and it
17b94a
cleanup the defrag object.
17b94a
17b94a
Solution: To avoid the defrag object populate a reference count before
17b94a
          create a defrag rpc object.
17b94a
>Fixes: #1339
17b94a
>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
17b94a
>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
17b94a
Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
17b94a
17b94a
BUG: 1832306
17b94a
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
17b94a
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
17b94a
 xlators/mgmt/glusterd/src/glusterd-syncop.c    |  1 +
17b94a
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 59 +++++++++++++++++++++++++-
17b94a
 xlators/mgmt/glusterd/src/glusterd-utils.h     |  5 +++
17b94a
 xlators/mgmt/glusterd/src/glusterd.h           |  1 +
17b94a
 5 files changed, 90 insertions(+), 11 deletions(-)
17b94a
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
17b94a
index b419a89..fcd5318 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
17b94a
@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
17b94a
     glusterd_conf_t *priv = NULL;
17b94a
     xlator_t *this = NULL;
17b94a
     int pid = -1;
17b94a
+    int refcnt = 0;
17b94a
 
17b94a
     this = THIS;
17b94a
     if (!this)
17b94a
@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
17b94a
         }
17b94a
 
17b94a
         case RPC_CLNT_DISCONNECT: {
17b94a
-            if (!defrag->connected)
17b94a
-                return 0;
17b94a
-
17b94a
             LOCK(&defrag->lock);
17b94a
             {
17b94a
+                if (!defrag->connected) {
17b94a
+                    UNLOCK(&defrag->lock);
17b94a
+                    return 0;
17b94a
+                }
17b94a
                 defrag->connected = 0;
17b94a
             }
17b94a
             UNLOCK(&defrag->lock);
17b94a
@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
17b94a
             glusterd_defrag_rpc_put(defrag);
17b94a
             if (defrag->cbk_fn)
17b94a
                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
17b94a
-
17b94a
-            GF_FREE(defrag);
17b94a
+            refcnt = glusterd_defrag_unref(defrag);
17b94a
             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
17b94a
-                   "Rebalance process for volume %s has disconnected.",
17b94a
-                   volinfo->volname);
17b94a
+                   "Rebalance process for volume %s has disconnected"
17b94a
+                   " and defrag refcnt is %d.",
17b94a
+                   volinfo->volname, refcnt);
17b94a
             break;
17b94a
         }
17b94a
         case RPC_CLNT_DESTROY:
17b94a
@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
17b94a
         gf_msg_debug("glusterd", 0, "rebalance command failed");
17b94a
         goto out;
17b94a
     }
17b94a
-
17b94a
+    /* Take reference before sleep to save defrag object cleanup while
17b94a
+       glusterd_restart_rebalance call for other bricks by syncktask
17b94a
+       at the time of restart a glusterd.
17b94a
+    */
17b94a
+    glusterd_defrag_ref(defrag);
17b94a
     sleep(5);
17b94a
 
17b94a
     ret = glusterd_rebalance_rpc_create(volinfo);
17b94a
@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
17b94a
     GF_ASSERT(this);
17b94a
     priv = this->private;
17b94a
     GF_ASSERT(priv);
17b94a
+    struct rpc_clnt *rpc = NULL;
17b94a
 
17b94a
     // rebalance process is not started
17b94a
     if (!defrag)
17b94a
@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
17b94a
     }
17b94a
 
17b94a
     glusterd_volinfo_ref(volinfo);
17b94a
-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
17b94a
-                              volinfo, _gf_true);
17b94a
+    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
17b94a
+                              _gf_false);
17b94a
     if (ret) {
17b94a
         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
17b94a
                "Glusterd RPC creation failed");
17b94a
         goto out;
17b94a
     }
17b94a
+    LOCK(&defrag->lock);
17b94a
+    {
17b94a
+        if (!defrag->rpc)
17b94a
+            defrag->rpc = rpc;
17b94a
+        else
17b94a
+            rpc_clnt_unref(rpc);
17b94a
+    }
17b94a
+    UNLOCK(&defrag->lock);
17b94a
     ret = 0;
17b94a
 out:
17b94a
     if (options)
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
index df78fef..05c9e11 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
17b94a
@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
17b94a
         if (!rpc) {
17b94a
             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
17b94a
                 volinfo = pending_node->node;
17b94a
+                glusterd_defrag_ref(volinfo->rebal.defrag);
17b94a
                 ret = glusterd_rebalance_rpc_create(volinfo);
17b94a
                 if (ret) {
17b94a
                     ret = 0;
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
17b94a
index bc188a2..9fb8eab 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
17b94a
@@ -93,6 +93,44 @@
17b94a
 #define NLMV4_VERSION 4
17b94a
 #define NLMV1_VERSION 1
17b94a
 
17b94a
+int
17b94a
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
17b94a
+{
17b94a
+    int refcnt = 0;
17b94a
+
17b94a
+    if (!defrag)
17b94a
+        goto out;
17b94a
+
17b94a
+    LOCK(&defrag->lock);
17b94a
+    {
17b94a
+        refcnt = ++defrag->refcnt;
17b94a
+    }
17b94a
+    UNLOCK(&defrag->lock);
17b94a
+
17b94a
+out:
17b94a
+    return refcnt;
17b94a
+}
17b94a
+
17b94a
+int
17b94a
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
17b94a
+{
17b94a
+    int refcnt = -1;
17b94a
+
17b94a
+    if (!defrag)
17b94a
+        goto out;
17b94a
+
17b94a
+    LOCK(&defrag->lock);
17b94a
+    {
17b94a
+        refcnt = --defrag->refcnt;
17b94a
+        if (refcnt <= 0)
17b94a
+            GF_FREE(defrag);
17b94a
+    }
17b94a
+    UNLOCK(&defrag->lock);
17b94a
+
17b94a
+out:
17b94a
+    return refcnt;
17b94a
+}
17b94a
+
17b94a
 gf_boolean_t
17b94a
 is_brick_mx_enabled(void)
17b94a
 {
17b94a
@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
17b94a
     char pidfile[PATH_MAX] = "";
17b94a
     int ret = -1;
17b94a
     pid_t pid = 0;
17b94a
+    int refcnt = 0;
17b94a
 
17b94a
     this = THIS;
17b94a
     GF_ASSERT(this);
17b94a
@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
17b94a
                              volinfo->volname);
17b94a
                     goto out;
17b94a
                 }
17b94a
-                ret = glusterd_rebalance_rpc_create(volinfo);
17b94a
+                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
17b94a
+                /* If refcnt value is 1 it means either defrag object is
17b94a
+                   poulated by glusterd_rebalance_defrag_init or previous
17b94a
+                   rpc creation was failed.If it is not 1 it means it(defrag)
17b94a
+                   was populated at the time of start a rebalance daemon.
17b94a
+                   We need to create a rpc object only while a previous
17b94a
+                   rpc connection was not established successfully at the
17b94a
+                   time of restart a rebalance daemon by
17b94a
+                   glusterd_handle_defrag_start otherwise rebalance cli
17b94a
+                   does not show correct status after just reboot a node and try
17b94a
+                   to print the rebalance status because defrag object has been
17b94a
+                   destroyed during handling of rpc disconnect.
17b94a
+                */
17b94a
+                if (refcnt == 1) {
17b94a
+                    ret = glusterd_rebalance_rpc_create(volinfo);
17b94a
+                } else {
17b94a
+                    ret = 0;
17b94a
+                    glusterd_defrag_unref(volinfo->rebal.defrag);
17b94a
+                }
17b94a
                 break;
17b94a
             }
17b94a
         case GF_DEFRAG_STATUS_NOT_STARTED:
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
17b94a
index 02d85d2..4541471 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
17b94a
@@ -886,4 +886,9 @@ int32_t
17b94a
 glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
17b94a
                            int32_t sub_count);
17b94a
 
17b94a
+int
17b94a
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
17b94a
+
17b94a
+int
17b94a
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
17b94a
 #endif
17b94a
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
17b94a
index efe4d0e..9de3f28 100644
17b94a
--- a/xlators/mgmt/glusterd/src/glusterd.h
17b94a
+++ b/xlators/mgmt/glusterd/src/glusterd.h
17b94a
@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
17b94a
     uint64_t total_data;
17b94a
     uint64_t num_files_lookedup;
17b94a
     uint64_t total_failures;
17b94a
+    int refcnt;
17b94a
     gf_lock_t lock;
17b94a
     int cmd;
17b94a
     pthread_t th;
17b94a
-- 
17b94a
1.8.3.1
17b94a