From f51150570ee15f4eb504e6d8830c40feb44c0722 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Tue, 9 May 2017 10:09:23 +0530
Subject: [PATCH 422/426] glusterd: socketfile & pidfile related fixes for
brick multiplexing feature
Problem: While brick-muliplexing is on after restarting glusterd, CLI is
not showing pid of all brick processes in all volumes.
Solution: While brick-mux is on all local brick process communicated through one
UNIX socket but as per current code (glusterd_brick_start) it is trying
to communicate with separate UNIX socket for each volume which is populated
based on brick-name and vol-name.Because of multiplexing design only one
UNIX socket is opened so it is throwing poller error and not able to
fetch correct status of brick process through cli process.
To resolve the problem write a new function glusterd_set_socket_filepath_for_mux
that will call by glusterd_brick_start to validate about the existence of socketpath.
To avoid the continuous EPOLLERR erros in logs update socket_connect code.
Test: To reproduce the issue followed below steps
1) Create two distributed volumes(dist1 and dist2)
2) Set cluster.brick-multiplex is on
3) kill glusterd
4) run command gluster v status
After apply the patch it shows correct pid for all volumes
> BUG: 1444596
> Change-Id: I5d10af69dea0d0ca19511f43870f34295a54a4d2
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17101
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Prashanth Pai <ppai@redhat.com>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
> (cherry picked from commit 21c7f7baccfaf644805e63682e5a7d2a9864a1e6)
BUG: 1443972
Change-Id: I0edd8f8b31d9e60261376ce4307d4db18d6ceb79
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/105596
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
glusterfsd/src/glusterfsd.c | 1 -
libglusterfs/src/common-utils.c | 28 +++--
libglusterfs/src/common-utils.h | 3 +-
.../bug-1444596_brick_mux_gd_status_restart.t | 68 ++++++++++++
.../bug-1444596_brick_mux_posix_hlth_chk_status.t | 44 ++++++++
tests/bugs/glusterd/bug-913555.t | 4 +
tests/volume.rc | 29 ++++-
xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 5 +-
xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 6 +-
xlators/mgmt/glusterd/src/glusterd-rebalance.c | 3 +-
xlators/mgmt/glusterd/src/glusterd-store.c | 4 +
xlators/mgmt/glusterd/src/glusterd-utils.c | 122 ++++++++++++++++-----
xlators/mgmt/glusterd/src/glusterd.c | 3 -
xlators/mgmt/glusterd/src/glusterd.h | 4 +
xlators/storage/posix/src/posix-helpers.c | 33 ++++--
15 files changed, 306 insertions(+), 51 deletions(-)
create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 7957f38..2724ed7 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1993,7 +1993,6 @@ glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
cmd_args->pid_file);
if (ctx->cmd_args.pid_file) {
- sys_unlink (ctx->cmd_args.pid_file);
ctx->cmd_args.pid_file = NULL;
}
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index e335e94..0cc8f91 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -3635,6 +3635,24 @@ gf_skip_header_section (int fd, int header_len)
return ret;
}
+/* Below function is use to check at runtime if pid is running */
+
+gf_boolean_t
+gf_is_pid_running (int pid)
+{
+ char fname[32] = {0,};
+
+ snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+ if (sys_access (fname , R_OK) != 0) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+
+}
+
+
gf_boolean_t
gf_is_service_running (char *pidfile, int *pid)
{
@@ -3663,15 +3681,7 @@ gf_is_service_running (char *pidfile, int *pid)
*pid = -1;
}
- if (!*pid) {
- /*
- * PID 0 means we've started the process, but it hasn't gotten
- * far enough to put in a real PID yet. More details are in
- * glusterd_brick_start.
- */
- running = _gf_true;
- }
-
+ running = gf_is_pid_running (*pid);
out:
if (file)
fclose (file);
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index 9109de9..d71cb22 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -811,7 +811,8 @@ int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *arg);
int gf_thread_create_detached (pthread_t *thread,
void *(*start_routine)(void *), void *arg);
-
+gf_boolean_t
+gf_is_pid_running (int pid);
gf_boolean_t
gf_is_service_running (char *pidfile, int *pid);
gf_boolean_t
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
new file mode 100644
index 0000000..950cb5f
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+function count_up_bricks {
+ $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+EXPECT 1 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+
+TEST $CLI volume stop $V0
+TEST $CLI volume stop $V1
+
+cleanup
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume set $V0 performance.cache-size 32MB
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+#Check No. of brick processes after change option
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+EXPECT 2 count_brick_processes
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
new file mode 100644
index 0000000..39ab2dd
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+function count_up_bricks {
+ $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+TEST glusterd -LDEBUG
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $H0:$B0/brick{0,1}
+
+#Check No. of brick processes after remove brick from back-end
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+EXPECT 1 count_brick_processes
+
+pkill glusterd
+TEST glusterd -LDEBUG
+sleep 5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+
+cleanup
+
diff --git a/tests/bugs/glusterd/bug-913555.t b/tests/bugs/glusterd/bug-913555.t
index 0bc839e..8d0bc9d 100755
--- a/tests/bugs/glusterd/bug-913555.t
+++ b/tests/bugs/glusterd/bug-913555.t
@@ -16,6 +16,10 @@ function check_peers {
$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
}
+function online_brick_count {
+ $CLI_1 --xml volume status | grep '<status>1' | wc -l
+}
+
cleanup;
TEST launch_cluster 3; # start 3-node virtual cluster
diff --git a/tests/volume.rc b/tests/volume.rc
index 336d9df..b3786c3 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -21,9 +21,36 @@ function brick_count()
$CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
}
+function check_brick_status() {
+ cmd="gluster --xml volume status"
+ local daemon=$1
+
+ if [[ -z $daemon ]]
+ then
+ echo `$cmd | grep '<status>1' | wc -l`
+ else
+ echo `$cmd | grep -A 5 ${daemon} | grep '<status>1' | wc -l`
+ fi
+}
+
function online_brick_count ()
{
- pgrep glusterfsd | wc -l
+ local v1=0
+ local v2=0
+ local v3=0
+ local v4=0
+ local v5=0
+ local tot=0
+
+ #First count total Number of bricks and then subtract daemon status
+ v1=`check_brick_status`
+ v2=`check_brick_status "Self-heal"`
+ v3=`check_brick_status "Quota"`
+ v4=`check_brick_status "Snapshot"`
+ v5=`check_brick_status "Tier"`
+ tot=$((v1-v2-v3-v4-v5))
+ echo $tot
+
}
function brick_up_status {
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index e12d314..04add60 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1949,6 +1949,7 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
int ret = -1;
char pidfile[PATH_MAX+1] = {0,};
glusterd_conf_t *priv = THIS->private;
+ int pid = -1;
/* Check whether all the nodes of the bricks to be removed are
* up, if not fail the operation */
@@ -2014,12 +2015,14 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
}
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
brickinfo, priv);
- if (!gf_is_service_running (pidfile, NULL)) {
+ if (!gf_is_service_running (pidfile, &pid)) {
snprintf (msg, sizeof (msg), "Found dead "
"brick %s", brick);
*errstr = gf_strdup (msg);
ret = -1;
goto out;
+ } else {
+ ret = 0;
}
continue;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
index 9f93462..8eeec40 100644
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -105,7 +105,7 @@ glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags)
goto out;
sleep (1);
- if (gf_is_service_running (proc->pidfile, NULL)) {
+ if (gf_is_service_running (proc->pidfile, &pid)) {
ret = kill (pid, SIGKILL);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, errno,
@@ -131,5 +131,7 @@ glusterd_proc_get_pid (glusterd_proc_t *proc)
int
glusterd_proc_is_running (glusterd_proc_t *proc)
{
- return gf_is_service_running (proc->pidfile, NULL);
+ int pid = -1;
+
+ return gf_is_service_running (proc->pidfile, &pid);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 301ad7c..29b5233 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -84,6 +84,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
char pidfile[PATH_MAX];
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
+ int pid = -1;
this = THIS;
if (!this)
@@ -134,7 +135,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
}
UNLOCK (&defrag->lock);
- if (!gf_is_service_running (pidfile, NULL)) {
+ if (!gf_is_service_running (pidfile, &pid)) {
if (volinfo->rebal.defrag_status ==
GF_DEFRAG_STATUS_STARTED) {
volinfo->rebal.defrag_status =
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index cf1b3ae..4f9144c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -4446,6 +4446,10 @@ glusterd_restore ()
this = THIS;
+ ret = glusterd_options_init (this);
+ if (ret < 0)
+ goto out;
+
ret = glusterd_store_retrieve_volumes (this, NULL);
if (ret)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 36eb1d1..85b4607 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1604,7 +1604,7 @@ glusterd_service_stop (const char *service, char *pidfile, int sig,
goto out;
sleep (1);
- if (gf_is_service_running (pidfile, NULL)) {
+ if (gf_is_service_running (pidfile, &pid)) {
ret = kill (pid, SIGKILL);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, errno,
@@ -1705,6 +1705,8 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
int expected_file_len = 0;
+ char export_path[PATH_MAX] = {0,};
+ char sock_filepath[PATH_MAX] = {0,};
expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") +
MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1;
@@ -1715,18 +1717,10 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
priv = this->private;
GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
- if (is_brick_mx_enabled ()) {
- snprintf (sockpath, len, "%s/run/daemon-%s.socket",
- volume_dir, brickinfo->hostname);
- } else {
- char export_path[PATH_MAX] = {0,};
- char sock_filepath[PATH_MAX] = {0,};
- GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
- snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
- volume_dir, brickinfo->hostname, export_path);
-
- glusterd_set_socket_filepath (sock_filepath, sockpath, len);
- }
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
+ snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
+ volume_dir, brickinfo->hostname, export_path);
+ glusterd_set_socket_filepath (sock_filepath, sockpath, len);
}
/* connection happens only if it is not aleady connected,
@@ -1820,6 +1814,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
char rdma_brick_path[PATH_MAX] = {0,};
struct rpc_clnt *rpc = NULL;
rpc_clnt_connection_t *conn = NULL;
+ int pid = -1;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
@@ -1842,7 +1837,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
}
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
- if (gf_is_service_running (pidfile, NULL)) {
+ if (gf_is_service_running (pidfile, &pid)) {
goto connect;
}
@@ -5034,8 +5029,6 @@ attach_brick (xlator_t *this,
GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf);
GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
- (void) sys_unlink (pidfile2);
- (void) sys_link (pidfile1, pidfile2);
if (volinfo->is_snap_volume) {
snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s",
@@ -5055,6 +5048,10 @@ attach_brick (xlator_t *this,
GLUSTERD_BRICK_ATTACH);
rpc_clnt_unref (rpc);
if (!ret) {
+ /* PID file is copied once brick has attached
+ successfully
+ */
+ glusterd_copy_file (pidfile1, pidfile2);
return 0;
}
}
@@ -5274,6 +5271,80 @@ find_compatible_brick (glusterd_conf_t *conf,
return NULL;
}
+/* Below function is use to populate sockpath based on passed pid
+ value as a argument after check the value from proc
+*/
+
+void
+glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
+{
+ char fname[128] = {0,};
+ char buf[1024] = {0,};
+ char cmdline[2048] = {0,};
+ xlator_t *this = NULL;
+ int fd = -1;
+ int i = 0, j = 0;
+ char *ptr = NULL;
+ char *brptr = NULL;
+ char tmpsockpath[PATH_MAX] = {0,};
+ size_t blen = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+ if (sys_access (fname , R_OK) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "brick process %d is not running", pid);
+ return;
+ }
+
+ fd = open(fname, O_RDONLY);
+ if (fd != -1) {
+ blen = (int)sys_read(fd, buf, 1024);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "open failed %s to open a file %s", strerror (errno),
+ fname);
+ return;
+ }
+
+ /* convert cmdline to single string */
+ for (i = 0 , j = 0; i < blen; i++) {
+ if (buf[i] == '\0')
+ cmdline[j++] = ' ';
+ else if (buf[i] < 32 || buf[i] > 126) /* remove control char */
+ continue;
+ else if (buf[i] == '"' || buf[i] == '\\') {
+ cmdline[j++] = '\\';
+ cmdline[j++] = buf[i];
+ } else {
+ cmdline[j++] = buf[i];
+ }
+ }
+ cmdline[j] = '\0';
+ if (fd)
+ sys_close(fd);
+
+ ptr = strstr(cmdline, "-S ");
+ ptr = strchr(ptr, '/');
+ brptr = strstr(ptr, "--brick-name");
+ i = 0;
+
+ while (ptr < brptr) {
+ if (*ptr != 32)
+ tmpsockpath[i++] = *ptr;
+ ptr++;
+ }
+
+ if (tmpsockpath[0]) {
+ strncpy (sockpath, tmpsockpath , i);
+ }
+
+}
+
+
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
@@ -5285,7 +5356,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_conf_t *conf = NULL;
int32_t pid = -1;
char pidfile[PATH_MAX] = {0};
- FILE *fp;
char socketpath[PATH_MAX] = {0};
glusterd_volinfo_t *other_vol;
@@ -5339,8 +5409,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
* same port (on another brick) and re-use that.
* TBD: re-use RPC connection across bricks
*/
- glusterd_set_brick_socket_filepath (volinfo, brickinfo,
- socketpath, sizeof (socketpath));
+ if (is_brick_mx_enabled ())
+ glusterd_get_sock_from_brick_pid (pid, socketpath,
+ sizeof(socketpath));
+ else
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo,
+ socketpath,
+ sizeof (socketpath));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Using %s as sockfile for brick %s of volume %s ",
+ socketpath, brickinfo->path, volinfo->volname);
(void) glusterd_brick_connect (volinfo, brickinfo,
socketpath);
}
@@ -5379,12 +5457,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
*
* TBD: pray for GlusterD 2 to be ready soon.
*/
- (void) sys_unlink (pidfile);
- fp = fopen (pidfile, "w+");
- if (fp) {
- (void) fprintf (fp, "0\n");
- (void) fclose (fp);
- }
ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 7d66718..cba1e06 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1876,9 +1876,6 @@ init (xlator_t *this)
if (cds_list_empty (&conf->peers)) {
glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
}
- ret = glusterd_options_init (this);
- if (ret < 0)
- goto out;
ret = glusterd_handle_upgrade_downgrade (this->options, conf, upgrade,
downgrade);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index e3cc6ee..dde06bd 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -1227,4 +1227,8 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
int
__glusterd_handle_reset_brick (rpcsvc_request_t *req);
+
+
+int
+glusterd_options_init (xlator_t *this);
#endif
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index dd6791c..276ba3c 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -51,6 +51,7 @@
#include "hashfn.h"
#include "glusterfs-acl.h"
#include "events.h"
+#include <sys/types.h>
char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
"trusted.glusterfs.*.xtime",
@@ -1842,6 +1843,9 @@ posix_health_check_thread_proc (void *data)
struct posix_private *priv = NULL;
uint32_t interval = 0;
int ret = -1;
+ xlator_t *top = NULL;
+ xlator_list_t **trav_p = NULL;
+ int count = 0;
this = data;
priv = this->private;
@@ -1890,18 +1894,33 @@ abort:
xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
- ret = sleep (30);
- if (ret == 0) {
+ /* Below code is use to ensure if brick multiplexing is enabled if
+ count is more than 1 it means brick mux has enabled
+ */
+ if (this->ctx->active) {
+ top = this->ctx->active->first;
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ count++;
+ }
+ }
+
+ if (count == 1) {
gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
"still alive! -> SIGTERM");
- kill (getpid(), SIGTERM);
- }
+ ret = sleep (30);
- ret = sleep (30);
- if (ret == 0) {
+ /* Need to kill the process only while brick mux has not enabled
+ */
+ if (ret == 0)
+ kill (getpid(), SIGTERM);
+
+ ret = sleep (30);
gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
"still alive! -> SIGKILL");
- kill (getpid(), SIGKILL);
+ if (ret == 0)
+ kill (getpid(), SIGKILL);
+
}
return NULL;
--
1.8.3.1