|
|
17b94a |
From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
|
|
|
17b94a |
From: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
17b94a |
Date: Tue, 16 Jul 2019 20:36:57 +0530
|
|
|
17b94a |
Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
|
|
|
17b94a |
volume in loop
|
|
|
17b94a |
|
|
|
17b94a |
Problem: In brick_mux environment sometime brick is crashed while
|
|
|
17b94a |
volume stop/start in a loop.Brick is crashed in janitor task
|
|
|
17b94a |
at the time of accessing priv.If posix priv is cleaned up before
|
|
|
17b94a |
call janitor task then janitor task is crashed.
|
|
|
17b94a |
|
|
|
17b94a |
Solution: To avoid the crash in brick_mux environment introduce a new
|
|
|
17b94a |
flag janitor_task_stop in posix_private and before send CHILD_DOWN event
|
|
|
17b94a |
wait for update the flag by janitor_task_done
|
|
|
17b94a |
|
|
|
17b94a |
> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
|
|
|
17b94a |
> fixes: bz#1730409
|
|
|
17b94a |
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
17b94a |
> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
|
|
|
17b94a |
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/)
|
|
|
17b94a |
|
|
|
17b94a |
Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
|
|
|
17b94a |
fixex: bz#1729971
|
|
|
17b94a |
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
17b94a |
Reviewed-on: https://code.engineering.redhat.com/gerrit/178934
|
|
|
17b94a |
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
|
|
|
17b94a |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
17b94a |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
17b94a |
---
|
|
|
17b94a |
libglusterfs/src/glusterfs/xlator.h | 3 +++
|
|
|
17b94a |
xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++--
|
|
|
17b94a |
xlators/protocol/server/src/server.c | 6 ++++-
|
|
|
17b94a |
xlators/storage/posix/src/posix-common.c | 40 +++++++++++++++++++++++++++++-
|
|
|
17b94a |
xlators/storage/posix/src/posix-helpers.c | 16 ++++++++++++
|
|
|
17b94a |
xlators/storage/posix/src/posix.h | 3 +++
|
|
|
17b94a |
6 files changed, 69 insertions(+), 4 deletions(-)
|
|
|
17b94a |
|
|
|
17b94a |
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
|
|
|
17b94a |
index b78daad..da551e9 100644
|
|
|
17b94a |
--- a/libglusterfs/src/glusterfs/xlator.h
|
|
|
17b94a |
+++ b/libglusterfs/src/glusterfs/xlator.h
|
|
|
17b94a |
@@ -861,6 +861,9 @@ struct _xlator {
|
|
|
17b94a |
|
|
|
17b94a |
/* Flag to notify got CHILD_DOWN event for detach brick */
|
|
|
17b94a |
uint32_t notify_down;
|
|
|
17b94a |
+
|
|
|
17b94a |
+ /* Flag to avoid throw duplicate PARENT_DOWN event */
|
|
|
17b94a |
+ uint32_t parent_down;
|
|
|
17b94a |
};
|
|
|
17b94a |
|
|
|
17b94a |
/* This would be the only structure which needs to be exported by
|
|
|
17b94a |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
17b94a |
index 2aa975b..812c698 100644
|
|
|
17b94a |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
17b94a |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
17b94a |
@@ -4082,8 +4082,9 @@ out:
|
|
|
17b94a |
if (msg[0]) {
|
|
|
17b94a |
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
|
|
|
17b94a |
msg);
|
|
|
17b94a |
- gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
|
|
|
17b94a |
- new_brickinfo->hostname, new_brickinfo->path);
|
|
|
17b94a |
+ if (new_brickinfo)
|
|
|
17b94a |
+ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
|
|
|
17b94a |
+ new_brickinfo->hostname, new_brickinfo->path);
|
|
|
17b94a |
}
|
|
|
17b94a |
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
|
|
|
17b94a |
return ret;
|
|
|
17b94a |
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
|
|
|
17b94a |
index 6ae63ba..a5f09fe 100644
|
|
|
17b94a |
--- a/xlators/protocol/server/src/server.c
|
|
|
17b94a |
+++ b/xlators/protocol/server/src/server.c
|
|
|
17b94a |
@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
|
|
|
17b94a |
gf_boolean_t victim_found = _gf_false;
|
|
|
17b94a |
xlator_list_t **trav_p = NULL;
|
|
|
17b94a |
xlator_t *top = NULL;
|
|
|
17b94a |
+ uint32_t parent_down = 0;
|
|
|
17b94a |
|
|
|
17b94a |
GF_ASSERT(data);
|
|
|
17b94a |
|
|
|
17b94a |
@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
|
|
|
17b94a |
victim = (*trav_p)->xlator;
|
|
|
17b94a |
if (victim->cleanup_starting &&
|
|
|
17b94a |
strcmp(victim->name, victim_name) == 0) {
|
|
|
17b94a |
- victim_found = _gf_true;
|
|
|
17b94a |
+ parent_down = victim->parent_down;
|
|
|
17b94a |
+ victim->parent_down = 1;
|
|
|
17b94a |
+ if (!parent_down)
|
|
|
17b94a |
+ victim_found = _gf_true;
|
|
|
17b94a |
break;
|
|
|
17b94a |
}
|
|
|
17b94a |
}
|
|
|
17b94a |
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
|
|
|
17b94a |
index d738692..69857d9 100644
|
|
|
17b94a |
--- a/xlators/storage/posix/src/posix-common.c
|
|
|
17b94a |
+++ b/xlators/storage/posix/src/posix-common.c
|
|
|
17b94a |
@@ -146,10 +146,15 @@ int32_t
|
|
|
17b94a |
posix_notify(xlator_t *this, int32_t event, void *data, ...)
|
|
|
17b94a |
{
|
|
|
17b94a |
xlator_t *victim = data;
|
|
|
17b94a |
+ struct posix_private *priv = this->private;
|
|
|
17b94a |
+ int ret = 0;
|
|
|
17b94a |
+ struct timespec sleep_till = {
|
|
|
17b94a |
+ 0,
|
|
|
17b94a |
+ };
|
|
|
17b94a |
|
|
|
17b94a |
switch (event) {
|
|
|
17b94a |
case GF_EVENT_PARENT_UP: {
|
|
|
17b94a |
- /* Tell the parent that posix xlator is up */
|
|
|
17b94a |
+ /* the parent that posix xlator is up */
|
|
|
17b94a |
default_notify(this, GF_EVENT_CHILD_UP, data);
|
|
|
17b94a |
} break;
|
|
|
17b94a |
|
|
|
17b94a |
@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
|
|
|
17b94a |
break;
|
|
|
17b94a |
gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
|
|
|
17b94a |
victim->name);
|
|
|
17b94a |
+
|
|
|
17b94a |
+ if (priv->janitor) {
|
|
|
17b94a |
+ pthread_mutex_lock(&priv->janitor_mutex);
|
|
|
17b94a |
+ {
|
|
|
17b94a |
+ priv->janitor_task_stop = _gf_true;
|
|
|
17b94a |
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
|
|
|
17b94a |
+ priv->janitor);
|
|
|
17b94a |
+ if (!ret) {
|
|
|
17b94a |
+ clock_gettime(CLOCK_REALTIME, &sleep_till);
|
|
|
17b94a |
+ sleep_till.tv_sec += 1;
|
|
|
17b94a |
+ /* Wait to set janitor_task flag to _gf_false by
|
|
|
17b94a |
+ * janitor_task_done */
|
|
|
17b94a |
+ while (priv->janitor_task_stop) {
|
|
|
17b94a |
+ (void)pthread_cond_timedwait(&priv->janitor_cond,
|
|
|
17b94a |
+ &priv->janitor_mutex,
|
|
|
17b94a |
+ &sleep_till);
|
|
|
17b94a |
+ clock_gettime(CLOCK_REALTIME, &sleep_till);
|
|
|
17b94a |
+ sleep_till.tv_sec += 1;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ pthread_mutex_unlock(&priv->janitor_mutex);
|
|
|
17b94a |
+ GF_FREE(priv->janitor);
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ priv->janitor = NULL;
|
|
|
17b94a |
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
|
|
|
17b94a |
} break;
|
|
|
17b94a |
default:
|
|
|
17b94a |
@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
|
|
|
17b94a |
|
|
|
17b94a |
pthread_mutex_init(&_private->fsync_mutex, NULL);
|
|
|
17b94a |
pthread_cond_init(&_private->fsync_cond, NULL);
|
|
|
17b94a |
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
|
|
|
17b94a |
+ pthread_cond_init(&_private->janitor_cond, NULL);
|
|
|
17b94a |
INIT_LIST_HEAD(&_private->fsyncs);
|
|
|
17b94a |
ret = posix_spawn_ctx_janitor_thread(this);
|
|
|
17b94a |
if (ret)
|
|
|
17b94a |
@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
|
|
|
17b94a |
(void)gf_thread_cleanup_xint(priv->disk_space_check);
|
|
|
17b94a |
priv->disk_space_check = 0;
|
|
|
17b94a |
}
|
|
|
17b94a |
+
|
|
|
17b94a |
if (priv->janitor) {
|
|
|
17b94a |
/*TODO: Make sure the synctask is also complete */
|
|
|
17b94a |
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
|
|
|
17b94a |
@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
|
|
|
17b94a |
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
|
|
|
17b94a |
"Failed to delete janitor timer");
|
|
|
17b94a |
}
|
|
|
17b94a |
+ GF_FREE(priv->janitor);
|
|
|
17b94a |
priv->janitor = NULL;
|
|
|
17b94a |
}
|
|
|
17b94a |
+
|
|
|
17b94a |
if (priv->fsyncer) {
|
|
|
17b94a |
(void)gf_thread_cleanup_xint(priv->fsyncer);
|
|
|
17b94a |
priv->fsyncer = 0;
|
|
|
17b94a |
@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
|
|
|
17b94a |
GF_FREE(priv->base_path);
|
|
|
17b94a |
LOCK_DESTROY(&priv->lock);
|
|
|
17b94a |
pthread_mutex_destroy(&priv->fsync_mutex);
|
|
|
17b94a |
+ pthread_cond_destroy(&priv->fsync_cond);
|
|
|
17b94a |
+ pthread_mutex_destroy(&priv->janitor_mutex);
|
|
|
17b94a |
+ pthread_cond_destroy(&priv->janitor_cond);
|
|
|
17b94a |
GF_FREE(priv->hostname);
|
|
|
17b94a |
GF_FREE(priv->trash_path);
|
|
|
17b94a |
GF_FREE(priv);
|
|
|
17b94a |
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
|
|
|
17b94a |
index 07169b5..ef5bfd5 100644
|
|
|
17b94a |
--- a/xlators/storage/posix/src/posix-helpers.c
|
|
|
17b94a |
+++ b/xlators/storage/posix/src/posix-helpers.c
|
|
|
17b94a |
@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
|
|
|
17b94a |
this = data;
|
|
|
17b94a |
priv = this->private;
|
|
|
17b94a |
|
|
|
17b94a |
+ pthread_mutex_lock(&priv->janitor_mutex);
|
|
|
17b94a |
+ {
|
|
|
17b94a |
+ if (priv->janitor_task_stop) {
|
|
|
17b94a |
+ priv->janitor_task_stop = _gf_false;
|
|
|
17b94a |
+ pthread_cond_signal(&priv->janitor_cond);
|
|
|
17b94a |
+ pthread_mutex_unlock(&priv->janitor_mutex);
|
|
|
17b94a |
+ goto out;
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ }
|
|
|
17b94a |
+ pthread_mutex_unlock(&priv->janitor_mutex);
|
|
|
17b94a |
+
|
|
|
17b94a |
LOCK(&priv->lock);
|
|
|
17b94a |
{
|
|
|
17b94a |
__posix_janitor_timer_start(this);
|
|
|
17b94a |
}
|
|
|
17b94a |
UNLOCK(&priv->lock);
|
|
|
17b94a |
|
|
|
17b94a |
+out:
|
|
|
17b94a |
return 0;
|
|
|
17b94a |
}
|
|
|
17b94a |
|
|
|
17b94a |
@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
|
|
|
17b94a |
old_this = THIS;
|
|
|
17b94a |
THIS = this;
|
|
|
17b94a |
|
|
|
17b94a |
+ if (!priv)
|
|
|
17b94a |
+ goto out;
|
|
|
17b94a |
+
|
|
|
17b94a |
time(&now;;
|
|
|
17b94a |
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
|
|
|
17b94a |
if (priv->disable_landfill_purge) {
|
|
|
17b94a |
@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
|
|
|
17b94a |
|
|
|
17b94a |
THIS = old_this;
|
|
|
17b94a |
|
|
|
17b94a |
+out:
|
|
|
17b94a |
return 0;
|
|
|
17b94a |
}
|
|
|
17b94a |
|
|
|
17b94a |
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
|
|
|
17b94a |
index b0935a7..64288a7 100644
|
|
|
17b94a |
--- a/xlators/storage/posix/src/posix.h
|
|
|
17b94a |
+++ b/xlators/storage/posix/src/posix.h
|
|
|
17b94a |
@@ -203,6 +203,8 @@ struct posix_private {
|
|
|
17b94a |
struct list_head fsyncs;
|
|
|
17b94a |
pthread_mutex_t fsync_mutex;
|
|
|
17b94a |
pthread_cond_t fsync_cond;
|
|
|
17b94a |
+ pthread_mutex_t janitor_mutex;
|
|
|
17b94a |
+ pthread_cond_t janitor_cond;
|
|
|
17b94a |
int fsync_queue_count;
|
|
|
17b94a |
|
|
|
17b94a |
enum {
|
|
|
17b94a |
@@ -257,6 +259,7 @@ struct posix_private {
|
|
|
17b94a |
|
|
|
17b94a |
gf_boolean_t fips_mode_rchecksum;
|
|
|
17b94a |
gf_boolean_t ctime;
|
|
|
17b94a |
+ gf_boolean_t janitor_task_stop;
|
|
|
17b94a |
};
|
|
|
17b94a |
|
|
|
17b94a |
typedef struct {
|
|
|
17b94a |
--
|
|
|
17b94a |
1.8.3.1
|
|
|
17b94a |
|