|
|
74096c |
From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
|
|
|
74096c |
From: mohit84 <moagrawa@redhat.com>
|
|
|
74096c |
Date: Thu, 18 Feb 2021 09:40:44 +0530
|
|
|
74096c |
Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
|
|
|
74096c |
after reboot (#2172)
|
|
|
74096c |
|
|
|
74096c |
Rebalance cli is not showing correct status after reboot.
|
|
|
74096c |
|
|
|
74096c |
The CLI is not correct status because defrag object is not
|
|
|
74096c |
valid at the time of creating a rpc connection to show the status.
|
|
|
74096c |
The defrag object is not valid because at the time of start a glusterd
|
|
|
74096c |
glusterd_restart_rebalance can be call almost at the same time by two
|
|
|
74096c |
different synctask and glusterd got a disconnect on rpc object and it
|
|
|
74096c |
cleanup the defrag object.
|
|
|
74096c |
|
|
|
74096c |
Solution: To avoid the defrag object populate a reference count before
|
|
|
74096c |
create a defrag rpc object.
|
|
|
74096c |
>Fixes: #1339
|
|
|
74096c |
>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
|
|
|
74096c |
>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
|
|
|
74096c |
Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
|
|
|
74096c |
|
|
|
74096c |
BUG: 1832306
|
|
|
74096c |
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
|
|
|
74096c |
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
|
|
|
74096c |
Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
|
|
|
74096c |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
74096c |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
74096c |
---
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 +
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd-utils.c | 59 +++++++++++++++++++++++++-
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd-utils.h | 5 +++
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd.h | 1 +
|
|
|
74096c |
5 files changed, 90 insertions(+), 11 deletions(-)
|
|
|
74096c |
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
|
|
|
74096c |
index b419a89..fcd5318 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
|
|
|
74096c |
@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
|
|
|
74096c |
glusterd_conf_t *priv = NULL;
|
|
|
74096c |
xlator_t *this = NULL;
|
|
|
74096c |
int pid = -1;
|
|
|
74096c |
+ int refcnt = 0;
|
|
|
74096c |
|
|
|
74096c |
this = THIS;
|
|
|
74096c |
if (!this)
|
|
|
74096c |
@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
case RPC_CLNT_DISCONNECT: {
|
|
|
74096c |
- if (!defrag->connected)
|
|
|
74096c |
- return 0;
|
|
|
74096c |
-
|
|
|
74096c |
LOCK(&defrag->lock);
|
|
|
74096c |
{
|
|
|
74096c |
+ if (!defrag->connected) {
|
|
|
74096c |
+ UNLOCK(&defrag->lock);
|
|
|
74096c |
+ return 0;
|
|
|
74096c |
+ }
|
|
|
74096c |
defrag->connected = 0;
|
|
|
74096c |
}
|
|
|
74096c |
UNLOCK(&defrag->lock);
|
|
|
74096c |
@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
|
|
|
74096c |
glusterd_defrag_rpc_put(defrag);
|
|
|
74096c |
if (defrag->cbk_fn)
|
|
|
74096c |
defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
|
|
|
74096c |
-
|
|
|
74096c |
- GF_FREE(defrag);
|
|
|
74096c |
+ refcnt = glusterd_defrag_unref(defrag);
|
|
|
74096c |
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
|
|
|
74096c |
- "Rebalance process for volume %s has disconnected.",
|
|
|
74096c |
- volinfo->volname);
|
|
|
74096c |
+ "Rebalance process for volume %s has disconnected"
|
|
|
74096c |
+ " and defrag refcnt is %d.",
|
|
|
74096c |
+ volinfo->volname, refcnt);
|
|
|
74096c |
break;
|
|
|
74096c |
}
|
|
|
74096c |
case RPC_CLNT_DESTROY:
|
|
|
74096c |
@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
|
|
|
74096c |
gf_msg_debug("glusterd", 0, "rebalance command failed");
|
|
|
74096c |
goto out;
|
|
|
74096c |
}
|
|
|
74096c |
-
|
|
|
74096c |
+ /* Take reference before sleep to save defrag object cleanup while
|
|
|
74096c |
+ glusterd_restart_rebalance call for other bricks by syncktask
|
|
|
74096c |
+ at the time of restart a glusterd.
|
|
|
74096c |
+ */
|
|
|
74096c |
+ glusterd_defrag_ref(defrag);
|
|
|
74096c |
sleep(5);
|
|
|
74096c |
|
|
|
74096c |
ret = glusterd_rebalance_rpc_create(volinfo);
|
|
|
74096c |
@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
|
|
|
74096c |
GF_ASSERT(this);
|
|
|
74096c |
priv = this->private;
|
|
|
74096c |
GF_ASSERT(priv);
|
|
|
74096c |
+ struct rpc_clnt *rpc = NULL;
|
|
|
74096c |
|
|
|
74096c |
// rebalance process is not started
|
|
|
74096c |
if (!defrag)
|
|
|
74096c |
@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
glusterd_volinfo_ref(volinfo);
|
|
|
74096c |
- ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
|
|
|
74096c |
- volinfo, _gf_true);
|
|
|
74096c |
+ ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
|
|
|
74096c |
+ _gf_false);
|
|
|
74096c |
if (ret) {
|
|
|
74096c |
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
|
|
|
74096c |
"Glusterd RPC creation failed");
|
|
|
74096c |
goto out;
|
|
|
74096c |
}
|
|
|
74096c |
+ LOCK(&defrag->lock);
|
|
|
74096c |
+ {
|
|
|
74096c |
+ if (!defrag->rpc)
|
|
|
74096c |
+ defrag->rpc = rpc;
|
|
|
74096c |
+ else
|
|
|
74096c |
+ rpc_clnt_unref(rpc);
|
|
|
74096c |
+ }
|
|
|
74096c |
+ UNLOCK(&defrag->lock);
|
|
|
74096c |
ret = 0;
|
|
|
74096c |
out:
|
|
|
74096c |
if (options)
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
|
|
|
74096c |
index df78fef..05c9e11 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
|
|
|
74096c |
@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
|
|
|
74096c |
if (!rpc) {
|
|
|
74096c |
if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
|
|
|
74096c |
volinfo = pending_node->node;
|
|
|
74096c |
+ glusterd_defrag_ref(volinfo->rebal.defrag);
|
|
|
74096c |
ret = glusterd_rebalance_rpc_create(volinfo);
|
|
|
74096c |
if (ret) {
|
|
|
74096c |
ret = 0;
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
index bc188a2..9fb8eab 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
@@ -93,6 +93,44 @@
|
|
|
74096c |
#define NLMV4_VERSION 4
|
|
|
74096c |
#define NLMV1_VERSION 1
|
|
|
74096c |
|
|
|
74096c |
+int
|
|
|
74096c |
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
|
|
|
74096c |
+{
|
|
|
74096c |
+ int refcnt = 0;
|
|
|
74096c |
+
|
|
|
74096c |
+ if (!defrag)
|
|
|
74096c |
+ goto out;
|
|
|
74096c |
+
|
|
|
74096c |
+ LOCK(&defrag->lock);
|
|
|
74096c |
+ {
|
|
|
74096c |
+ refcnt = ++defrag->refcnt;
|
|
|
74096c |
+ }
|
|
|
74096c |
+ UNLOCK(&defrag->lock);
|
|
|
74096c |
+
|
|
|
74096c |
+out:
|
|
|
74096c |
+ return refcnt;
|
|
|
74096c |
+}
|
|
|
74096c |
+
|
|
|
74096c |
+int
|
|
|
74096c |
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
|
|
|
74096c |
+{
|
|
|
74096c |
+ int refcnt = -1;
|
|
|
74096c |
+
|
|
|
74096c |
+ if (!defrag)
|
|
|
74096c |
+ goto out;
|
|
|
74096c |
+
|
|
|
74096c |
+ LOCK(&defrag->lock);
|
|
|
74096c |
+ {
|
|
|
74096c |
+ refcnt = --defrag->refcnt;
|
|
|
74096c |
+ if (refcnt <= 0)
|
|
|
74096c |
+ GF_FREE(defrag);
|
|
|
74096c |
+ }
|
|
|
74096c |
+ UNLOCK(&defrag->lock);
|
|
|
74096c |
+
|
|
|
74096c |
+out:
|
|
|
74096c |
+ return refcnt;
|
|
|
74096c |
+}
|
|
|
74096c |
+
|
|
|
74096c |
gf_boolean_t
|
|
|
74096c |
is_brick_mx_enabled(void)
|
|
|
74096c |
{
|
|
|
74096c |
@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
|
|
|
74096c |
char pidfile[PATH_MAX] = "";
|
|
|
74096c |
int ret = -1;
|
|
|
74096c |
pid_t pid = 0;
|
|
|
74096c |
+ int refcnt = 0;
|
|
|
74096c |
|
|
|
74096c |
this = THIS;
|
|
|
74096c |
GF_ASSERT(this);
|
|
|
74096c |
@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
|
|
|
74096c |
volinfo->volname);
|
|
|
74096c |
goto out;
|
|
|
74096c |
}
|
|
|
74096c |
- ret = glusterd_rebalance_rpc_create(volinfo);
|
|
|
74096c |
+ refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
|
|
|
74096c |
+ /* If refcnt value is 1 it means either defrag object is
|
|
|
74096c |
+ poulated by glusterd_rebalance_defrag_init or previous
|
|
|
74096c |
+ rpc creation was failed.If it is not 1 it means it(defrag)
|
|
|
74096c |
+ was populated at the time of start a rebalance daemon.
|
|
|
74096c |
+ We need to create a rpc object only while a previous
|
|
|
74096c |
+ rpc connection was not established successfully at the
|
|
|
74096c |
+ time of restart a rebalance daemon by
|
|
|
74096c |
+ glusterd_handle_defrag_start otherwise rebalance cli
|
|
|
74096c |
+ does not show correct status after just reboot a node and try
|
|
|
74096c |
+ to print the rebalance status because defrag object has been
|
|
|
74096c |
+ destroyed during handling of rpc disconnect.
|
|
|
74096c |
+ */
|
|
|
74096c |
+ if (refcnt == 1) {
|
|
|
74096c |
+ ret = glusterd_rebalance_rpc_create(volinfo);
|
|
|
74096c |
+ } else {
|
|
|
74096c |
+ ret = 0;
|
|
|
74096c |
+ glusterd_defrag_unref(volinfo->rebal.defrag);
|
|
|
74096c |
+ }
|
|
|
74096c |
break;
|
|
|
74096c |
}
|
|
|
74096c |
case GF_DEFRAG_STATUS_NOT_STARTED:
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
|
|
|
74096c |
index 02d85d2..4541471 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
|
|
|
74096c |
@@ -886,4 +886,9 @@ int32_t
|
|
|
74096c |
glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
|
|
|
74096c |
int32_t sub_count);
|
|
|
74096c |
|
|
|
74096c |
+int
|
|
|
74096c |
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
|
|
|
74096c |
+
|
|
|
74096c |
+int
|
|
|
74096c |
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
|
|
|
74096c |
#endif
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
74096c |
index efe4d0e..9de3f28 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
74096c |
@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
|
|
|
74096c |
uint64_t total_data;
|
|
|
74096c |
uint64_t num_files_lookedup;
|
|
|
74096c |
uint64_t total_failures;
|
|
|
74096c |
+ int refcnt;
|
|
|
74096c |
gf_lock_t lock;
|
|
|
74096c |
int cmd;
|
|
|
74096c |
pthread_t th;
|
|
|
74096c |
--
|
|
|
74096c |
1.8.3.1
|
|
|
74096c |
|