From c0860a9b9736b2d5bbe74f96039e1a2ccb90b2b0 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Fri, 2 Jun 2017 16:44:27 +0530
Subject: [PATCH 483/486] terfs: Not able to mount running volume after enable
brick mux and stopped any volume
Problem: After enabled brick mux if any volume has down and then try ot run mount
with running volume , mount command is hung.
Solution: After enable brick mux server has shared one data structure server_conf
for all associated subvolumes.After down any subvolume in some
ungraceful manner (remove brick directory) posix xlator sends
GF_EVENT_CHILD_DOWN event to parent xlatros and server notify
updates the child_up to false in server_conf.When client is trying
to communicate with server through mount it checks conf->child_up
and it is FALSE so it throws message "translator are not yet ready".
From this patch updated structure server_conf to save child_up status
for xlator wise. Another improtant correction from this patch is
cleanup threads from server side xlators after stop the volume.
> BUG: 1453977
> Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17356
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (cherry picked from commit dba55ae364a2772904bb68a6bd0ea87289ee1470)
BUG: 1451598
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Change-Id: I174110264de74bb76ffdb635da797cf55fe65ee5
Signed-off-by: moagrawa <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/108021
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
glusterfsd/src/glusterfsd-mgmt.c | 113 +++++++++++----------
libglusterfs/src/defaults-tmpl.c | 11 ++
rpc/xdr/src/glusterfs-fops.x | 4 +-
.../bug-1444596_brick_mux_posix_hlth_chk_status.t | 3 +
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 31 ++++++
xlators/features/changelog/src/changelog.c | 15 +++
.../changetimerecorder/src/changetimerecorder.c | 26 +++++
xlators/features/index/src/index.c | 22 +++-
xlators/features/index/src/index.h | 1 +
xlators/mgmt/glusterd/src/glusterd.c | 2 +-
xlators/performance/io-threads/src/io-threads.c | 3 +-
xlators/protocol/server/src/Makefile.am | 3 +-
xlators/protocol/server/src/server-handshake.c | 24 +++--
xlators/protocol/server/src/server-mem-types.h | 1 +
xlators/protocol/server/src/server-messages.h | 10 +-
xlators/protocol/server/src/server.c | 95 +++++++++++++++--
xlators/protocol/server/src/server.h | 10 +-
xlators/storage/posix/src/posix-helpers.c | 37 ++++---
xlators/storage/posix/src/posix.c | 20 ++++
19 files changed, 333 insertions(+), 98 deletions(-)
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 6256030..365706e 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -200,10 +200,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
{
gd1_mgmt_brick_op_req xlator_req = {0,};
ssize_t ret;
- xlator_t *top = NULL;
- xlator_t *victim = NULL;
- glusterfs_ctx_t *ctx = NULL;
- xlator_list_t **trav_p;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_list_t **trav_p = NULL;
+ gf_boolean_t lockflag = _gf_false;
ret = xdr_to_generic (req->msg[0], &xlator_req,
(xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -216,57 +217,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
LOCK (&ctx->volfile_lock);
{
/* Find the xlator_list_t that points to our victim. */
- top = glusterfsd_ctx->active->first;
- for (trav_p = &top->children; *trav_p;
- trav_p = &(*trav_p)->next) {
- victim = (*trav_p)->xlator;
- if (strcmp (victim->name, xlator_req.name) == 0) {
- break;
+ if (glusterfsd_ctx->active) {
+ top = glusterfsd_ctx->active->first;
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (strcmp (victim->name, xlator_req.name) == 0) {
+ break;
+ }
}
}
-
- if (!*trav_p) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "can't terminate %s - not found",
- xlator_req.name);
- /*
- * Used to be -ENOENT. However, the caller asked us to
- * make sure it's down and if it's already down that's
- * good enough.
- */
- glusterfs_terminate_response_send (req, 0);
- goto err;
- }
-
+ }
+ if (!*trav_p) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "can't terminate %s - not found",
+ xlator_req.name);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
glusterfs_terminate_response_send (req, 0);
- if ((trav_p == &top->children) && !(*trav_p)->next) {
- gf_log (THIS->name, GF_LOG_INFO,
- "terminating after loss of last child %s",
- xlator_req.name);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
- xlator_req.name);
- kill (getpid(), SIGTERM);
- } else {
- /*
- * This is terribly unsafe without quiescing or shutting
- * things down properly but it gets us to the point
- * where we can test other stuff.
- *
- * TBD: finish implementing this "detach" code properly
- */
- gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
- " child %s", xlator_req.name);
- top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
- xlator_req.name);
-
- *trav_p = (*trav_p)->next;
- glusterfs_autoscale_threads (THIS->ctx, -1);
- }
+ goto err;
+ }
+ glusterfs_terminate_response_send (req, 0);
+ if ((trav_p == &top->children) && !(*trav_p)->next) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "terminating after loss of last child %s",
+ xlator_req.name);
+ glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
+ kill (getpid(), SIGTERM);
+ } else {
+ /*
+ * This is terribly unsafe without quiescing or shutting
+ * things down properly but it gets us to the point
+ * where we can test other stuff.
+ *
+ * TBD: finish implementing this "detach" code properly
+ */
+ UNLOCK (&ctx->volfile_lock);
+ lockflag = _gf_true;
+ gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
+ " child %s", xlator_req.name);
+ top->notify (top, GF_EVENT_CLEANUP, victim);
}
err:
- UNLOCK (&ctx->volfile_lock);
+ if (!lockflag)
+ UNLOCK (&ctx->volfile_lock);
free (xlator_req.name);
xlator_req.name = NULL;
return 0;
@@ -840,6 +838,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
int32_t ret = -1;
gd1_mgmt_brick_op_req xlator_req = {0,};
xlator_t *this = NULL;
+ xlator_t *nextchild = NULL;
glusterfs_graph_t *newgraph = NULL;
glusterfs_ctx_t *ctx = NULL;
@@ -864,15 +863,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
gf_log (this->name, GF_LOG_INFO,
"got attach for %s", xlator_req.name);
ret = glusterfs_graph_attach (this->ctx->active,
- xlator_req.name,
- &newgraph);
- if (ret == 0) {
- ret = glusterfs_graph_parent_up (newgraph);
+ xlator_req.name, &newgraph);
+ if (!ret && (newgraph && newgraph->first)) {
+ nextchild = newgraph->first;
+ ret = xlator_notify (nextchild,
+ GF_EVENT_PARENT_UP,
+ nextchild);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0,
LG_MSG_EVENT_NOTIFY_FAILED,
"Parent up notification "
- "failed");
+ "failed for %s ",
+ nextchild->name);
goto out;
}
glusterfs_autoscale_threads (this->ctx, 1);
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
index 5b7578b..7454bc8 100644
--- a/libglusterfs/src/defaults-tmpl.c
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -170,6 +170,17 @@ default_notify (xlator_t *this, int32_t event, void *data, ...)
}
}
break;
+ case GF_EVENT_CLEANUP:
+ {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ xlator_notify (list->xlator, event, this);
+ list = list->next;
+ }
+ }
+ break;
+
default:
{
xlator_list_t *parent = this->parents;
diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x
index c8f00f0..2f8f4ca 100644
--- a/rpc/xdr/src/glusterfs-fops.x
+++ b/rpc/xdr/src/glusterfs-fops.x
@@ -63,7 +63,7 @@ enum glusterfs_fop_t {
};
/* Note: Removed event GF_EVENT_CHILD_MODIFIED=8, hence
- *to preserve backward compatibiliy, GF_EVENT_TRANSPORT_CLEANUP = 9
+ *to preserve backward compatibiliy, GF_EVEN_CLEANUP = 9
*/
enum glusterfs_event_t {
GF_EVENT_PARENT_UP = 1,
@@ -73,7 +73,7 @@ enum glusterfs_event_t {
GF_EVENT_CHILD_UP,
GF_EVENT_CHILD_DOWN,
GF_EVENT_CHILD_CONNECTING,
- GF_EVENT_TRANSPORT_CLEANUP = 9,
+ GF_EVENT_CLEANUP = 9,
GF_EVENT_TRANSPORT_CONNECTED,
GF_EVENT_VOLFILE_MODIFIED,
GF_EVENT_GRAPH_NEW,
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
index 39ab2dd..e082ba1 100644
--- a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
@@ -34,6 +34,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
EXPECT 1 count_brick_processes
+TEST glusterfs -s $H0 --volfile-id $V1 $M0
+TEST touch $M0/file{1..10}
+
pkill glusterd
TEST glusterd -LDEBUG
sleep 5
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 2f2a3d5..ed8251b 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -10,6 +10,7 @@
#include <ctype.h>
#include <sys/uio.h>
+#include <signal.h>
#include "glusterfs.h"
#include "xlator.h"
@@ -214,6 +215,36 @@ out:
return ret;
}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ br_stub_private_t *priv = NULL;
+
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ switch (event) {
+ case GF_EVENT_CLEANUP:
+ if (priv->signth) {
+ (void) gf_thread_cleanup_xint (priv->signth);
+ priv->signth = 0;
+ }
+ if (priv->container.thread) {
+ (void) gf_thread_cleanup_xint (priv->container.thread);
+ priv->container.thread = 0;
+ }
+ break;
+ }
+ default_notify (this, event, data);
+ return 0;
+}
+
+
void
fini (xlator_t *this)
{
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index e74da78..8817359 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -2104,6 +2104,7 @@ notify (xlator_t *this, int event, void *data, ...)
int ret = 0;
int ret1 = 0;
struct list_head queue = {0, };
+ int i = 0;
INIT_LIST_HEAD (&queue);
@@ -2111,6 +2112,20 @@ notify (xlator_t *this, int event, void *data, ...)
if (!priv)
goto out;
+ if (event == GF_EVENT_CLEANUP) {
+ if (priv->connector) {
+ (void) gf_thread_cleanup_xint (priv->connector);
+ priv->connector = 0;
+ }
+
+ for (; i < NR_DISPATCHERS; i++) {
+ if (priv->ev_dispatcher[i]) {
+ (void) gf_thread_cleanup_xint (priv->ev_dispatcher[i]);
+ priv->ev_dispatcher[i] = 0;
+ }
+ }
+ }
+
if (event == GF_EVENT_TRANSLATOR_OP) {
dict = data;
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
index dba4265..ac2406f 100644
--- a/xlators/features/changetimerecorder/src/changetimerecorder.c
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
@@ -2162,6 +2162,32 @@ out:
return 0;
}
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+
+ gf_ctr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv)
+ goto out;
+
+ if (event == GF_EVENT_CLEANUP) {
+ if (fini_db (priv->_db_conn)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed closing "
+ "db connection");
+ }
+ } else {
+ ret = default_notify (this, event, data);
+ }
+out:
+ return ret;
+
+}
+
int32_t
mem_acct_init (xlator_t *this)
{
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index f68dd55..ef5a60c 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -2270,7 +2270,6 @@ init (xlator_t *this)
int ret = -1;
int64_t count = -1;
index_priv_t *priv = NULL;
- pthread_t thread;
pthread_attr_t w_attr;
gf_boolean_t mutex_inited = _gf_false;
gf_boolean_t cond_inited = _gf_false;
@@ -2380,7 +2379,7 @@ init (xlator_t *this)
count = index_fetch_link_count (this, XATTROP);
index_set_link_count (priv, count, XATTROP);
- ret = gf_thread_create (&thread, &w_attr, index_worker, this);
+ ret = gf_thread_create (&priv->thread, &w_attr, index_worker, this);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "Failed to create "
"worker thread, aborting");
@@ -2490,6 +2489,25 @@ int
notify (xlator_t *this, int event, void *data, ...)
{
int ret = 0;
+ index_priv_t *priv = NULL;
+
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ switch (event) {
+ case GF_EVENT_CLEANUP:
+ if (priv->thread) {
+ (void) gf_thread_cleanup_xint (priv->thread);
+ priv->thread = 0;
+ }
+ break;
+ }
+
+
ret = default_notify (this, event, data);
return ret;
}
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
index 5fb5a65..f622cec 100644
--- a/xlators/features/index/src/index.h
+++ b/xlators/features/index/src/index.h
@@ -60,6 +60,7 @@ typedef struct index_priv {
dict_t *pending_watchlist;
dict_t *complete_watchlist;
int64_t pending_count;
+ pthread_t thread;
} index_priv_t;
typedef struct index_local {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index cba1e06..f718d36 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1973,7 +1973,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_POLLERR:
break;
- case GF_EVENT_TRANSPORT_CLEANUP:
+ case GF_EVENT_CLEANUP:
break;
default:
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 72a8208..5e81265 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1046,7 +1046,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
iot_conf_t *conf = this->private;
- if (GF_EVENT_PARENT_DOWN == event)
+ if ((GF_EVENT_PARENT_DOWN == event) ||
+ (GF_EVENT_CLEANUP == event))
iot_exit_threads (conf);
default_notify (this, event, data);
diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am
index bb46fda..6981ffa 100644
--- a/xlators/protocol/server/src/Makefile.am
+++ b/xlators/protocol/server/src/Makefile.am
@@ -21,7 +21,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) \
-DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
-I$(top_srcdir)/xlators/protocol/lib/src \
-I$(top_srcdir)/rpc/rpc-lib/src \
- -I$(top_srcdir)/rpc/xdr/src
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/glusterfsd/src
AM_CFLAGS = -Wall $(GF_CFLAGS) \
-DDATADIR=\"$(localstatedir)\"
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index f00804a..f8f8f99 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -413,6 +413,7 @@ server_setvolume (rpcsvc_request_t *req)
int32_t fop_version = 0;
int32_t mgmt_version = 0;
glusterfs_ctx_t *ctx = NULL;
+ struct _child_status *tmp = NULL;
params = dict_new ();
reply = dict_new ();
@@ -512,13 +513,24 @@ server_setvolume (rpcsvc_request_t *req)
"initialised yet. Try again later");
goto fail;
}
-
- ret = dict_set_int32 (reply, "child_up", conf->child_up);
- if (ret < 0)
+ list_for_each_entry (tmp, &conf->child_status->status_list,
+ status_list) {
+ if (strcmp (tmp->name, name) == 0)
+ break;
+ }
+ if (!tmp->name) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' "
- "in the reply dict");
-
+ PS_MSG_CHILD_STATUS_FAILED,
+ "No xlator %s is found in "
+ "child status list", name);
+ } else {
+ ret = dict_set_int32 (reply, "child_up", tmp->child_up);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_DICT_GET_FAILED,
+ "Failed to set 'child_up' for xlator %s "
+ "in the reply dict", tmp->name);
+ }
ret = dict_get_str (params, "process-uuid", &client_uid);
if (ret < 0) {
ret = dict_set_str (reply, "ERROR",
diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h
index 9165249..76a78ac 100644
--- a/xlators/protocol/server/src/server-mem-types.h
+++ b/xlators/protocol/server/src/server-mem-types.h
@@ -28,6 +28,7 @@ enum gf_server_mem_types_ {
gf_server_mt_setvolume_rsp_t,
gf_server_mt_lock_mig_t,
gf_server_mt_compound_rsp_t,
+ gf_server_mt_child_status,
gf_server_mt_end,
};
#endif /* __SERVER_MEM_TYPES_H__ */
diff --git a/xlators/protocol/server/src/server-messages.h b/xlators/protocol/server/src/server-messages.h
index b8245af..14729ad 100644
--- a/xlators/protocol/server/src/server-messages.h
+++ b/xlators/protocol/server/src/server-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_PS_BASE GLFS_MSGID_COMP_PS
-#define GLFS_NUM_MESSAGES 91
+#define GLFS_NUM_MESSAGES 92
#define GLFS_MSGID_END (GLFS_PS_BASE + GLFS_NUM_MESSAGES + 1)
/* Messages with message IDs */
#define glfs_msg_start_x GLFS_PS_BASE, "Invalid: Start of messages"
@@ -857,6 +857,14 @@
*/
#define PS_MSG_CLIENT_OPVERSION_GET_FAILED (GLFS_PS_BASE + 91)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define PS_MSG_CHILD_STATUS_FAILED (GLFS_PS_BASE + 92)
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 87a4252..6acf256 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -23,6 +23,7 @@
#include "event.h"
#include "events.h"
#include "server-messages.h"
+#include "glusterfsd.h"
rpcsvc_cbk_program_t server_cbk_prog = {
.progname = "Gluster Callback",
@@ -1072,7 +1073,9 @@ init (xlator_t *this)
if (ret)
conf->conf_dir = CONFDIR;
- conf->child_up = _gf_false;
+ conf->child_status = GF_CALLOC (1, sizeof (struct _child_status),
+ gf_server_mt_child_status);
+ INIT_LIST_HEAD (&conf->child_status->status_list);
/*ret = dict_get_str (this->options, "statedump-path", &statedump_path);
if (!ret) {
@@ -1396,10 +1399,15 @@ server_process_child_event (xlator_t *this, int32_t event, void *data,
pthread_mutex_lock (&conf->mutex);
{
list_for_each_entry (xprt, &conf->xprt_list, list) {
- rpcsvc_callback_submit (conf->rpc, xprt,
- &server_cbk_prog,
- cbk_procnum,
- NULL, 0, NULL);
+ if (!xprt->xl_private) {
+ continue;
+ }
+ if (xprt->xl_private->bound_xl == data) {
+ rpcsvc_callback_submit (conf->rpc, xprt,
+ &server_cbk_prog,
+ cbk_procnum,
+ NULL, 0, NULL);
+ }
}
}
pthread_mutex_unlock (&conf->mutex);
@@ -1420,6 +1428,13 @@ notify (xlator_t *this, int32_t event, void *data, ...)
va_list ap;
rpc_transport_t *xprt = NULL;
rpc_transport_t *xp_next = NULL;
+ xlator_t *victim = NULL;
+ xlator_t *top = NULL;
+ xlator_t *travxl = NULL;
+ xlator_list_t **trav_p = NULL;
+ struct _child_status *tmp = NULL;
+ gf_boolean_t victim_found = _gf_false;
+ glusterfs_ctx_t *ctx = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
conf = this->private;
@@ -1429,6 +1444,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
va_start (ap, data);
output = va_arg (ap, dict_t*);
va_end (ap);
+ victim = data;
+ ctx = THIS->ctx;
switch (event) {
case GF_EVENT_UPCALL:
@@ -1457,7 +1474,24 @@ notify (xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_CHILD_UP:
{
- conf->child_up = _gf_true;
+ list_for_each_entry (tmp, &conf->child_status->status_list,
+ status_list) {
+ if (tmp->name == NULL)
+ break;
+ if (strcmp (tmp->name, victim->name) == 0)
+ break;
+ }
+ if (tmp->name) {
+ tmp->child_up = _gf_true;
+ } else {
+ tmp = GF_CALLOC (1, sizeof (struct _child_status),
+ gf_server_mt_child_status);
+ INIT_LIST_HEAD (&tmp->status_list);
+ tmp->name = gf_strdup (victim->name);
+ tmp->child_up = _gf_true;
+ list_add_tail (&tmp->status_list,
+ &conf->child_status->status_list);
+ }
ret = server_process_child_event (this, event, data,
GF_CBK_CHILD_UP);
if (ret) {
@@ -1466,14 +1500,25 @@ notify (xlator_t *this, int32_t event, void *data, ...)
"server_process_child_event failed");
goto out;
}
-
default_notify (this, event, data);
break;
}
case GF_EVENT_CHILD_DOWN:
{
- conf->child_up = _gf_false;
+ list_for_each_entry (tmp, &conf->child_status->status_list,
+ status_list) {
+ if (strcmp (tmp->name, victim->name) == 0) {
+ tmp->child_up = _gf_false;
+ break;
+ }
+ }
+ if (!tmp->name)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_CHILD_STATUS_FAILED,
+ "No xlator %s is found in "
+ "child status list", victim->name);
+
ret = server_process_child_event (this, event, data,
GF_CBK_CHILD_DOWN);
if (ret) {
@@ -1482,13 +1527,12 @@ notify (xlator_t *this, int32_t event, void *data, ...)
"server_process_child_event failed");
goto out;
}
-
default_notify (this, event, data);
break;
}
- case GF_EVENT_TRANSPORT_CLEANUP:
+ case GF_EVENT_CLEANUP:
conf = this->private;
pthread_mutex_lock (&conf->mutex);
/*
@@ -1509,8 +1553,37 @@ notify (xlator_t *this, int32_t event, void *data, ...)
rpc_transport_disconnect (xprt, _gf_false);
}
}
+ list_for_each_entry (tmp, &conf->child_status->status_list,
+ status_list) {
+ if (strcmp (tmp->name, victim->name) == 0)
+ break;
+ }
+ if (tmp->name && (strcmp (tmp->name, victim->name) == 0)) {
+ GF_FREE (tmp->name);
+ list_del (&tmp->status_list);
+ }
pthread_mutex_unlock (&conf->mutex);
- /* NB: do *not* propagate anywhere else */
+ if (this->ctx->active) {
+ top = this->ctx->active->first;
+ LOCK (&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ travxl = (*trav_p)->xlator;
+ if (travxl &&
+ strcmp (travxl->name, victim->name) == 0) {
+ victim_found = _gf_true;
+ break;
+ }
+ }
+ UNLOCK (&ctx->volfile_lock);
+ if (victim_found)
+ (*trav_p) = (*trav_p)->next;
+ glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
+ victim->name);
+ glusterfs_autoscale_threads (THIS->ctx, -1);
+ default_notify (victim, GF_EVENT_CLEANUP, data);
+
+ }
break;
default:
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index 3272106..b419b05 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -73,6 +73,13 @@ struct _volfile_ctx {
uint32_t checksum;
};
+struct _child_status {
+ struct list_head status_list;
+ char *name;
+ gf_boolean_t child_up;
+
+};
+
struct server_conf {
rpcsvc_t *rpc;
struct rpcsvc_config rpc_conf;
@@ -101,8 +108,7 @@ struct server_conf {
* in case if volume set options
* (say *.allow | *.reject) are
* tweeked */
- gf_boolean_t child_up; /* Set to true, when child is up, and
- * false, when child is down */
+ struct _child_status *child_status;
gf_lock_t itable_lock;
};
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 0c50967..30f09ac 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1849,9 +1849,11 @@ posix_health_check_thread_proc (void *data)
xlator_list_t **trav_p = NULL;
int count = 0;
gf_boolean_t victim_found = _gf_false;
+ glusterfs_ctx_t *ctx = NULL;
this = data;
priv = this->private;
+ ctx = THIS->ctx;
/* prevent races when the interval is updated */
interval = priv->health_check_interval;
@@ -1860,7 +1862,6 @@ posix_health_check_thread_proc (void *data)
gf_msg_debug (this->name, 0, "health-check thread started, "
"interval = %d seconds", interval);
-
while (1) {
/* aborting sleep() is a request to exit this thread, sleep()
* will normally not return when cancelled */
@@ -1902,10 +1903,12 @@ abort:
*/
if (this->ctx->active) {
top = this->ctx->active->first;
- for (trav_p = &top->children; *trav_p;
+ LOCK (&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p;
trav_p = &(*trav_p)->next) {
- count++;
- }
+ count++;
+ }
+ UNLOCK (&ctx->volfile_lock);
}
if (count == 1) {
@@ -1925,26 +1928,28 @@ abort:
kill (getpid(), SIGKILL);
} else {
- for (trav_p = &top->children; *trav_p;
- trav_p = &(*trav_p)->next) {
- victim = (*trav_p)->xlator;
- if (victim &&
- strcmp (victim->name, priv->base_path) == 0) {
- victim_found = _gf_true;
- break;
+ LOCK (&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (victim &&
+ strcmp (victim->name, priv->base_path) == 0) {
+ victim_found = _gf_true;
+ break;
+ }
}
- }
+ UNLOCK (&ctx->volfile_lock);
if (victim_found) {
- top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
- priv->base_path);
- glusterfs_autoscale_threads (THIS->ctx, -1);
+ gf_log (THIS->name, GF_LOG_INFO, "detaching not-only "
+ " child %s", priv->base_path);
+ top->notify (top, GF_EVENT_CLEANUP, victim);
}
}
return NULL;
}
+
void
posix_spawn_health_check_thread (xlator_t *xl)
{
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 95eaf0c..ecc45f0 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -6588,6 +6588,9 @@ notify (xlator_t *this,
void *data,
...)
{
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
switch (event)
{
case GF_EVENT_PARENT_UP:
@@ -6596,6 +6599,23 @@ notify (xlator_t *this,
default_notify (this, GF_EVENT_CHILD_UP, data);
}
break;
+ case GF_EVENT_CLEANUP:
+ if (priv->health_check) {
+ pthread_cancel (priv->health_check);
+ priv->health_check = 0;
+ }
+ if (priv->janitor) {
+ (void) gf_thread_cleanup_xint (priv->janitor);
+ priv->janitor = 0;
+ }
+ if (priv->fsyncer) {
+ (void) gf_thread_cleanup_xint (priv->fsyncer);
+ priv->fsyncer = 0;
+ }
+ if (priv->mount_lock)
+ (void) sys_closedir (priv->mount_lock);
+
+ break;
default:
/* */
break;
--
1.8.3.1