b7d4d7
From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
b7d4d7
From: mohit84 <moagrawa@redhat.com>
b7d4d7
Date: Thu, 18 Feb 2021 09:40:44 +0530
b7d4d7
Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
b7d4d7
 after reboot (#2172)
b7d4d7
b7d4d7
Rebalance cli is not showing correct status after reboot.
b7d4d7
b7d4d7
The CLI is not correct status because defrag object is not
b7d4d7
valid at the time of creating a rpc connection to show the status.
b7d4d7
The defrag object is not valid because at the time of start a glusterd
b7d4d7
glusterd_restart_rebalance can be call almost at the same time by two
b7d4d7
different synctask and glusterd got a disconnect on rpc object and it
b7d4d7
cleanup the defrag object.
b7d4d7
b7d4d7
Solution: To avoid the defrag object populate a reference count before
b7d4d7
          create a defrag rpc object.
b7d4d7
>Fixes: #1339
b7d4d7
>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
b7d4d7
>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
b7d4d7
Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
b7d4d7
b7d4d7
BUG: 1832306
b7d4d7
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
b7d4d7
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
b7d4d7
Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
b7d4d7
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b7d4d7
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
b7d4d7
---
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-syncop.c    |  1 +
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 59 +++++++++++++++++++++++++-
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-utils.h     |  5 +++
b7d4d7
 xlators/mgmt/glusterd/src/glusterd.h           |  1 +
b7d4d7
 5 files changed, 90 insertions(+), 11 deletions(-)
b7d4d7
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
b7d4d7
index b419a89..fcd5318 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
b7d4d7
@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
b7d4d7
     glusterd_conf_t *priv = NULL;
b7d4d7
     xlator_t *this = NULL;
b7d4d7
     int pid = -1;
b7d4d7
+    int refcnt = 0;
b7d4d7
 
b7d4d7
     this = THIS;
b7d4d7
     if (!this)
b7d4d7
@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
b7d4d7
         }
b7d4d7
 
b7d4d7
         case RPC_CLNT_DISCONNECT: {
b7d4d7
-            if (!defrag->connected)
b7d4d7
-                return 0;
b7d4d7
-
b7d4d7
             LOCK(&defrag->lock);
b7d4d7
             {
b7d4d7
+                if (!defrag->connected) {
b7d4d7
+                    UNLOCK(&defrag->lock);
b7d4d7
+                    return 0;
b7d4d7
+                }
b7d4d7
                 defrag->connected = 0;
b7d4d7
             }
b7d4d7
             UNLOCK(&defrag->lock);
b7d4d7
@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
b7d4d7
             glusterd_defrag_rpc_put(defrag);
b7d4d7
             if (defrag->cbk_fn)
b7d4d7
                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
b7d4d7
-
b7d4d7
-            GF_FREE(defrag);
b7d4d7
+            refcnt = glusterd_defrag_unref(defrag);
b7d4d7
             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
b7d4d7
-                   "Rebalance process for volume %s has disconnected.",
b7d4d7
-                   volinfo->volname);
b7d4d7
+                   "Rebalance process for volume %s has disconnected"
b7d4d7
+                   " and defrag refcnt is %d.",
b7d4d7
+                   volinfo->volname, refcnt);
b7d4d7
             break;
b7d4d7
         }
b7d4d7
         case RPC_CLNT_DESTROY:
b7d4d7
@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
b7d4d7
         gf_msg_debug("glusterd", 0, "rebalance command failed");
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
-
b7d4d7
+    /* Take reference before sleep to save defrag object cleanup while
b7d4d7
+       glusterd_restart_rebalance call for other bricks by syncktask
b7d4d7
+       at the time of restart a glusterd.
b7d4d7
+    */
b7d4d7
+    glusterd_defrag_ref(defrag);
b7d4d7
     sleep(5);
b7d4d7
 
b7d4d7
     ret = glusterd_rebalance_rpc_create(volinfo);
b7d4d7
@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
b7d4d7
     GF_ASSERT(this);
b7d4d7
     priv = this->private;
b7d4d7
     GF_ASSERT(priv);
b7d4d7
+    struct rpc_clnt *rpc = NULL;
b7d4d7
 
b7d4d7
     // rebalance process is not started
b7d4d7
     if (!defrag)
b7d4d7
@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
b7d4d7
     }
b7d4d7
 
b7d4d7
     glusterd_volinfo_ref(volinfo);
b7d4d7
-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
b7d4d7
-                              volinfo, _gf_true);
b7d4d7
+    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
b7d4d7
+                              _gf_false);
b7d4d7
     if (ret) {
b7d4d7
         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
b7d4d7
                "Glusterd RPC creation failed");
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
+    LOCK(&defrag->lock);
b7d4d7
+    {
b7d4d7
+        if (!defrag->rpc)
b7d4d7
+            defrag->rpc = rpc;
b7d4d7
+        else
b7d4d7
+            rpc_clnt_unref(rpc);
b7d4d7
+    }
b7d4d7
+    UNLOCK(&defrag->lock);
b7d4d7
     ret = 0;
b7d4d7
 out:
b7d4d7
     if (options)
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
index df78fef..05c9e11 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
b7d4d7
@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
b7d4d7
         if (!rpc) {
b7d4d7
             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
b7d4d7
                 volinfo = pending_node->node;
b7d4d7
+                glusterd_defrag_ref(volinfo->rebal.defrag);
b7d4d7
                 ret = glusterd_rebalance_rpc_create(volinfo);
b7d4d7
                 if (ret) {
b7d4d7
                     ret = 0;
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
b7d4d7
index bc188a2..9fb8eab 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
b7d4d7
@@ -93,6 +93,44 @@
b7d4d7
 #define NLMV4_VERSION 4
b7d4d7
 #define NLMV1_VERSION 1
b7d4d7
 
b7d4d7
+int
b7d4d7
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
b7d4d7
+{
b7d4d7
+    int refcnt = 0;
b7d4d7
+
b7d4d7
+    if (!defrag)
b7d4d7
+        goto out;
b7d4d7
+
b7d4d7
+    LOCK(&defrag->lock);
b7d4d7
+    {
b7d4d7
+        refcnt = ++defrag->refcnt;
b7d4d7
+    }
b7d4d7
+    UNLOCK(&defrag->lock);
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    return refcnt;
b7d4d7
+}
b7d4d7
+
b7d4d7
+int
b7d4d7
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
b7d4d7
+{
b7d4d7
+    int refcnt = -1;
b7d4d7
+
b7d4d7
+    if (!defrag)
b7d4d7
+        goto out;
b7d4d7
+
b7d4d7
+    LOCK(&defrag->lock);
b7d4d7
+    {
b7d4d7
+        refcnt = --defrag->refcnt;
b7d4d7
+        if (refcnt <= 0)
b7d4d7
+            GF_FREE(defrag);
b7d4d7
+    }
b7d4d7
+    UNLOCK(&defrag->lock);
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    return refcnt;
b7d4d7
+}
b7d4d7
+
b7d4d7
 gf_boolean_t
b7d4d7
 is_brick_mx_enabled(void)
b7d4d7
 {
b7d4d7
@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
b7d4d7
     char pidfile[PATH_MAX] = "";
b7d4d7
     int ret = -1;
b7d4d7
     pid_t pid = 0;
b7d4d7
+    int refcnt = 0;
b7d4d7
 
b7d4d7
     this = THIS;
b7d4d7
     GF_ASSERT(this);
b7d4d7
@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
b7d4d7
                              volinfo->volname);
b7d4d7
                     goto out;
b7d4d7
                 }
b7d4d7
-                ret = glusterd_rebalance_rpc_create(volinfo);
b7d4d7
+                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
b7d4d7
+                /* If refcnt value is 1 it means either defrag object is
b7d4d7
+                   poulated by glusterd_rebalance_defrag_init or previous
b7d4d7
+                   rpc creation was failed.If it is not 1 it means it(defrag)
b7d4d7
+                   was populated at the time of start a rebalance daemon.
b7d4d7
+                   We need to create a rpc object only while a previous
b7d4d7
+                   rpc connection was not established successfully at the
b7d4d7
+                   time of restart a rebalance daemon by
b7d4d7
+                   glusterd_handle_defrag_start otherwise rebalance cli
b7d4d7
+                   does not show correct status after just reboot a node and try
b7d4d7
+                   to print the rebalance status because defrag object has been
b7d4d7
+                   destroyed during handling of rpc disconnect.
b7d4d7
+                */
b7d4d7
+                if (refcnt == 1) {
b7d4d7
+                    ret = glusterd_rebalance_rpc_create(volinfo);
b7d4d7
+                } else {
b7d4d7
+                    ret = 0;
b7d4d7
+                    glusterd_defrag_unref(volinfo->rebal.defrag);
b7d4d7
+                }
b7d4d7
                 break;
b7d4d7
             }
b7d4d7
         case GF_DEFRAG_STATUS_NOT_STARTED:
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
b7d4d7
index 02d85d2..4541471 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
b7d4d7
@@ -886,4 +886,9 @@ int32_t
b7d4d7
 glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
b7d4d7
                            int32_t sub_count);
b7d4d7
 
b7d4d7
+int
b7d4d7
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
b7d4d7
+
b7d4d7
+int
b7d4d7
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
b7d4d7
 #endif
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
b7d4d7
index efe4d0e..9de3f28 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd.h
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd.h
b7d4d7
@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
b7d4d7
     uint64_t total_data;
b7d4d7
     uint64_t num_files_lookedup;
b7d4d7
     uint64_t total_failures;
b7d4d7
+    int refcnt;
b7d4d7
     gf_lock_t lock;
b7d4d7
     int cmd;
b7d4d7
     pthread_t th;
b7d4d7
-- 
b7d4d7
1.8.3.1
b7d4d7