21ab4e
From f51150570ee15f4eb504e6d8830c40feb44c0722 Mon Sep 17 00:00:00 2001
21ab4e
From: moagrawa <moagrawa@redhat.com>
21ab4e
Date: Tue, 9 May 2017 10:09:23 +0530
21ab4e
Subject: [PATCH 422/426] glusterd: socketfile & pidfile related fixes for
21ab4e
 brick multiplexing feature
21ab4e
21ab4e
Problem: While brick-muliplexing is on after restarting glusterd, CLI is
21ab4e
         not showing pid of all brick processes in all volumes.
21ab4e
21ab4e
Solution: While brick-mux is on all local brick process communicated through one
21ab4e
          UNIX socket but as per current code (glusterd_brick_start) it is trying
21ab4e
          to communicate with separate UNIX socket for each volume which is populated
21ab4e
          based on brick-name and vol-name.Because of multiplexing design only one
21ab4e
          UNIX socket is opened so it is throwing poller error and not able to
21ab4e
          fetch correct status of brick process through cli process.
21ab4e
          To resolve the problem write a new function glusterd_set_socket_filepath_for_mux
21ab4e
          that will call by glusterd_brick_start to validate about the existence of socketpath.
21ab4e
          To avoid the continuous EPOLLERR erros in  logs update socket_connect code.
21ab4e
21ab4e
Test:     To reproduce the issue followed below steps
21ab4e
          1) Create two distributed volumes(dist1 and dist2)
21ab4e
          2) Set cluster.brick-multiplex is on
21ab4e
          3) kill glusterd
21ab4e
          4) run command gluster v status
21ab4e
          After apply the patch it shows correct pid for all volumes
21ab4e
21ab4e
> BUG: 1444596
21ab4e
> Change-Id: I5d10af69dea0d0ca19511f43870f34295a54a4d2
21ab4e
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17101
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Prashanth Pai <ppai@redhat.com>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
> (cherry picked from commit 21c7f7baccfaf644805e63682e5a7d2a9864a1e6)
21ab4e
21ab4e
BUG: 1443972
21ab4e
Change-Id: I0edd8f8b31d9e60261376ce4307d4db18d6ceb79
21ab4e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/105596
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 glusterfsd/src/glusterfsd.c                        |   1 -
21ab4e
 libglusterfs/src/common-utils.c                    |  28 +++--
21ab4e
 libglusterfs/src/common-utils.h                    |   3 +-
21ab4e
 .../bug-1444596_brick_mux_gd_status_restart.t      |  68 ++++++++++++
21ab4e
 .../bug-1444596_brick_mux_posix_hlth_chk_status.t  |  44 ++++++++
21ab4e
 tests/bugs/glusterd/bug-913555.t                   |   4 +
21ab4e
 tests/volume.rc                                    |  29 ++++-
21ab4e
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c     |   5 +-
21ab4e
 xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c     |   6 +-
21ab4e
 xlators/mgmt/glusterd/src/glusterd-rebalance.c     |   3 +-
21ab4e
 xlators/mgmt/glusterd/src/glusterd-store.c         |   4 +
21ab4e
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 122 ++++++++++++++++-----
21ab4e
 xlators/mgmt/glusterd/src/glusterd.c               |   3 -
21ab4e
 xlators/mgmt/glusterd/src/glusterd.h               |   4 +
21ab4e
 xlators/storage/posix/src/posix-helpers.c          |  33 ++++--
21ab4e
 15 files changed, 306 insertions(+), 51 deletions(-)
21ab4e
 create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
21ab4e
 create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
21ab4e
21ab4e
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
21ab4e
index 7957f38..2724ed7 100644
21ab4e
--- a/glusterfsd/src/glusterfsd.c
21ab4e
+++ b/glusterfsd/src/glusterfsd.c
21ab4e
@@ -1993,7 +1993,6 @@ glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
21ab4e
                       cmd_args->pid_file);
21ab4e
 
21ab4e
         if (ctx->cmd_args.pid_file) {
21ab4e
-                sys_unlink (ctx->cmd_args.pid_file);
21ab4e
                 ctx->cmd_args.pid_file = NULL;
21ab4e
         }
21ab4e
 
21ab4e
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
21ab4e
index e335e94..0cc8f91 100644
21ab4e
--- a/libglusterfs/src/common-utils.c
21ab4e
+++ b/libglusterfs/src/common-utils.c
21ab4e
@@ -3635,6 +3635,24 @@ gf_skip_header_section (int fd, int header_len)
21ab4e
         return ret;
21ab4e
 }
21ab4e
 
21ab4e
+/* Below function is use to check at runtime if pid is running */
21ab4e
+
21ab4e
+gf_boolean_t
21ab4e
+gf_is_pid_running (int pid)
21ab4e
+{
21ab4e
+        char fname[32] = {0,};
21ab4e
+
21ab4e
+        snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
21ab4e
+
21ab4e
+        if (sys_access (fname , R_OK) != 0) {
21ab4e
+                return _gf_false;
21ab4e
+        }
21ab4e
+
21ab4e
+        return _gf_true;
21ab4e
+
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
 gf_boolean_t
21ab4e
 gf_is_service_running (char *pidfile, int *pid)
21ab4e
 {
21ab4e
@@ -3663,15 +3681,7 @@ gf_is_service_running (char *pidfile, int *pid)
21ab4e
                 *pid = -1;
21ab4e
         }
21ab4e
 
21ab4e
-        if (!*pid) {
21ab4e
-                /*
21ab4e
-                 * PID 0 means we've started the process, but it hasn't gotten
21ab4e
-                 * far enough to put in a real PID yet.  More details are in
21ab4e
-                 * glusterd_brick_start.
21ab4e
-                 */
21ab4e
-                running = _gf_true;
21ab4e
-        }
21ab4e
-
21ab4e
+        running = gf_is_pid_running (*pid);
21ab4e
 out:
21ab4e
         if (file)
21ab4e
                 fclose (file);
21ab4e
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
21ab4e
index 9109de9..d71cb22 100644
21ab4e
--- a/libglusterfs/src/common-utils.h
21ab4e
+++ b/libglusterfs/src/common-utils.h
21ab4e
@@ -811,7 +811,8 @@ int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
21ab4e
                       void *(*start_routine)(void *), void *arg);
21ab4e
 int gf_thread_create_detached (pthread_t *thread,
21ab4e
                       void *(*start_routine)(void *), void *arg);
21ab4e
-
21ab4e
+gf_boolean_t
21ab4e
+gf_is_pid_running (int pid);
21ab4e
 gf_boolean_t
21ab4e
 gf_is_service_running (char *pidfile, int *pid);
21ab4e
 gf_boolean_t
21ab4e
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
21ab4e
new file mode 100644
21ab4e
index 0000000..950cb5f
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
21ab4e
@@ -0,0 +1,68 @@
21ab4e
+#!/bin/bash
21ab4e
+
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+. $(dirname $0)/../../cluster.rc
21ab4e
+
21ab4e
+
21ab4e
+function count_up_bricks {
21ab4e
+        $CLI --xml volume status $1 | grep '<status>1' | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+function count_brick_processes {
21ab4e
+        pgrep glusterfsd | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+cleanup
21ab4e
+TEST glusterd
21ab4e
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
21ab4e
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
21ab4e
+
21ab4e
+TEST $CLI volume set all cluster.brick-multiplex on
21ab4e
+
21ab4e
+TEST $CLI volume start $V0
21ab4e
+TEST $CLI volume start $V1
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
21ab4e
+EXPECT 1 count_brick_processes
21ab4e
+
21ab4e
+pkill glusterd
21ab4e
+TEST glusterd
21ab4e
+
21ab4e
+#Check brick status after restart glusterd
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
21ab4e
+
21ab4e
+
21ab4e
+TEST $CLI volume stop $V0
21ab4e
+TEST $CLI volume stop $V1
21ab4e
+
21ab4e
+cleanup
21ab4e
+
21ab4e
+TEST glusterd
21ab4e
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
21ab4e
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
21ab4e
+
21ab4e
+TEST $CLI volume set all cluster.brick-multiplex on
21ab4e
+
21ab4e
+TEST $CLI volume start $V0
21ab4e
+TEST $CLI volume start $V1
21ab4e
+
21ab4e
+EXPECT 1 count_brick_processes
21ab4e
+
21ab4e
+TEST $CLI volume set $V0 performance.cache-size 32MB
21ab4e
+TEST $CLI volume stop $V0
21ab4e
+TEST $CLI volume start $V0
21ab4e
+
21ab4e
+#Check No. of brick processes after change option
21ab4e
+EXPECT 2 count_brick_processes
21ab4e
+
21ab4e
+pkill glusterd
21ab4e
+TEST glusterd
21ab4e
+
21ab4e
+#Check brick status after restart glusterd should not be NA
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
21ab4e
+EXPECT 2 count_brick_processes
21ab4e
+
21ab4e
+cleanup
21ab4e
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
21ab4e
new file mode 100644
21ab4e
index 0000000..39ab2dd
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
21ab4e
@@ -0,0 +1,44 @@
21ab4e
+#!/bin/bash
21ab4e
+
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+. $(dirname $0)/../../cluster.rc
21ab4e
+
21ab4e
+
21ab4e
+function count_up_bricks {
21ab4e
+        $CLI --xml volume status $1 | grep '<status>1' | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+function count_brick_processes {
21ab4e
+        pgrep glusterfsd | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+cleanup
21ab4e
+TEST glusterd -LDEBUG
21ab4e
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
21ab4e
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
21ab4e
+
21ab4e
+TEST $CLI volume set all cluster.brick-multiplex on
21ab4e
+
21ab4e
+TEST $CLI volume start $V0
21ab4e
+TEST $CLI volume start $V1
21ab4e
+
21ab4e
+EXPECT 1 count_brick_processes
21ab4e
+
21ab4e
+TEST $CLI volume stop $V0
21ab4e
+TEST $CLI volume delete $V0
21ab4e
+TEST rm -rf $H0:$B0/brick{0,1}
21ab4e
+
21ab4e
+#Check No. of brick processes after remove brick from back-end
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
21ab4e
+
21ab4e
+EXPECT 1 count_brick_processes
21ab4e
+
21ab4e
+pkill glusterd
21ab4e
+TEST glusterd -LDEBUG
21ab4e
+sleep 5
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
21ab4e
+
21ab4e
+
21ab4e
+cleanup
21ab4e
+
21ab4e
diff --git a/tests/bugs/glusterd/bug-913555.t b/tests/bugs/glusterd/bug-913555.t
21ab4e
index 0bc839e..8d0bc9d 100755
21ab4e
--- a/tests/bugs/glusterd/bug-913555.t
21ab4e
+++ b/tests/bugs/glusterd/bug-913555.t
21ab4e
@@ -16,6 +16,10 @@ function check_peers {
21ab4e
 	$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
21ab4e
 }
21ab4e
 
21ab4e
+function online_brick_count {
21ab4e
+       $CLI_1 --xml volume status | grep '<status>1' | wc -l
21ab4e
+}
21ab4e
+
21ab4e
 cleanup;
21ab4e
 
21ab4e
 TEST launch_cluster 3; # start 3-node virtual cluster
21ab4e
diff --git a/tests/volume.rc b/tests/volume.rc
21ab4e
index 336d9df..b3786c3 100644
21ab4e
--- a/tests/volume.rc
21ab4e
+++ b/tests/volume.rc
21ab4e
@@ -21,9 +21,36 @@ function brick_count()
21ab4e
     $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
21ab4e
 }
21ab4e
 
21ab4e
+function check_brick_status() {
21ab4e
+       cmd="gluster --xml volume status"
21ab4e
+       local daemon=$1
21ab4e
+
21ab4e
+       if [[ -z $daemon ]]
21ab4e
+        then
21ab4e
+                echo `$cmd | grep '<status>1' | wc -l`
21ab4e
+       else
21ab4e
+                echo `$cmd | grep -A 5 ${daemon} | grep '<status>1' | wc -l`
21ab4e
+       fi
21ab4e
+}
21ab4e
+
21ab4e
 function online_brick_count ()
21ab4e
 {
21ab4e
-    pgrep glusterfsd | wc -l
21ab4e
+       local v1=0
21ab4e
+       local v2=0
21ab4e
+       local v3=0
21ab4e
+       local v4=0
21ab4e
+       local v5=0
21ab4e
+       local tot=0
21ab4e
+
21ab4e
+       #First count total Number of bricks and then subtract daemon status
21ab4e
+       v1=`check_brick_status`
21ab4e
+       v2=`check_brick_status "Self-heal"`
21ab4e
+       v3=`check_brick_status "Quota"`
21ab4e
+       v4=`check_brick_status "Snapshot"`
21ab4e
+       v5=`check_brick_status "Tier"`
21ab4e
+       tot=$((v1-v2-v3-v4-v5))
21ab4e
+       echo $tot
21ab4e
+
21ab4e
 }
21ab4e
 
21ab4e
 function brick_up_status {
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
21ab4e
index e12d314..04add60 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
21ab4e
@@ -1949,6 +1949,7 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
21ab4e
         int                     ret         = -1;
21ab4e
         char                    pidfile[PATH_MAX+1] = {0,};
21ab4e
         glusterd_conf_t        *priv        = THIS->private;
21ab4e
+        int                    pid          = -1;
21ab4e
 
21ab4e
         /* Check whether all the nodes of the bricks to be removed are
21ab4e
         * up, if not fail the operation */
21ab4e
@@ -2014,12 +2015,14 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
21ab4e
                         }
21ab4e
                         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
21ab4e
                                                     brickinfo, priv);
21ab4e
-                        if (!gf_is_service_running (pidfile, NULL)) {
21ab4e
+                        if (!gf_is_service_running (pidfile, &pid)) {
21ab4e
                                 snprintf (msg, sizeof (msg), "Found dead "
21ab4e
                                           "brick %s", brick);
21ab4e
                                 *errstr = gf_strdup (msg);
21ab4e
                                 ret = -1;
21ab4e
                                 goto out;
21ab4e
+                        } else {
21ab4e
+                                ret = 0;
21ab4e
                         }
21ab4e
                         continue;
21ab4e
                 }
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
21ab4e
index 9f93462..8eeec40 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
21ab4e
@@ -105,7 +105,7 @@ glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags)
21ab4e
                 goto out;
21ab4e
 
21ab4e
         sleep (1);
21ab4e
-        if (gf_is_service_running (proc->pidfile, NULL)) {
21ab4e
+        if (gf_is_service_running (proc->pidfile, &pid)) {
21ab4e
                 ret = kill (pid, SIGKILL);
21ab4e
                 if (ret) {
21ab4e
                         gf_msg (this->name, GF_LOG_ERROR, errno,
21ab4e
@@ -131,5 +131,7 @@ glusterd_proc_get_pid (glusterd_proc_t *proc)
21ab4e
 int
21ab4e
 glusterd_proc_is_running (glusterd_proc_t *proc)
21ab4e
 {
21ab4e
-        return gf_is_service_running (proc->pidfile, NULL);
21ab4e
+        int pid = -1;
21ab4e
+
21ab4e
+        return gf_is_service_running (proc->pidfile, &pid;;
21ab4e
 }
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
21ab4e
index 301ad7c..29b5233 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
21ab4e
@@ -84,6 +84,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
21ab4e
         char                    pidfile[PATH_MAX];
21ab4e
         glusterd_conf_t        *priv    = NULL;
21ab4e
         xlator_t               *this    = NULL;
21ab4e
+        int                    pid      = -1;
21ab4e
 
21ab4e
         this = THIS;
21ab4e
         if (!this)
21ab4e
@@ -134,7 +135,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
21ab4e
                 }
21ab4e
                 UNLOCK (&defrag->lock);
21ab4e
 
21ab4e
-                if (!gf_is_service_running (pidfile, NULL)) {
21ab4e
+                if (!gf_is_service_running (pidfile, &pid)) {
21ab4e
                         if (volinfo->rebal.defrag_status ==
21ab4e
                                                 GF_DEFRAG_STATUS_STARTED) {
21ab4e
                                 volinfo->rebal.defrag_status =
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
21ab4e
index cf1b3ae..4f9144c 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
21ab4e
@@ -4446,6 +4446,10 @@ glusterd_restore ()
21ab4e
 
21ab4e
         this = THIS;
21ab4e
 
21ab4e
+	ret = glusterd_options_init (this);
21ab4e
+        if (ret < 0)
21ab4e
+                goto out;
21ab4e
+
21ab4e
         ret = glusterd_store_retrieve_volumes (this, NULL);
21ab4e
         if (ret)
21ab4e
                 goto out;
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
index 36eb1d1..85b4607 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
@@ -1604,7 +1604,7 @@ glusterd_service_stop (const char *service, char *pidfile, int sig,
21ab4e
                 goto out;
21ab4e
 
21ab4e
         sleep (1);
21ab4e
-        if (gf_is_service_running (pidfile, NULL)) {
21ab4e
+        if (gf_is_service_running (pidfile, &pid)) {
21ab4e
                 ret = kill (pid, SIGKILL);
21ab4e
                 if (ret) {
21ab4e
                         gf_msg (this->name, GF_LOG_ERROR, errno,
21ab4e
@@ -1705,6 +1705,8 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
21ab4e
         xlator_t                *this = NULL;
21ab4e
         glusterd_conf_t         *priv = NULL;
21ab4e
         int                     expected_file_len = 0;
21ab4e
+        char                    export_path[PATH_MAX] = {0,};
21ab4e
+        char                    sock_filepath[PATH_MAX] = {0,};
21ab4e
 
21ab4e
         expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") +
21ab4e
                             MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1;
21ab4e
@@ -1715,18 +1717,10 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
21ab4e
         priv = this->private;
21ab4e
 
21ab4e
         GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
21ab4e
-        if (is_brick_mx_enabled ()) {
21ab4e
-                snprintf (sockpath, len, "%s/run/daemon-%s.socket",
21ab4e
-                          volume_dir, brickinfo->hostname);
21ab4e
-        } else {
21ab4e
-                char                    export_path[PATH_MAX] = {0,};
21ab4e
-                char                    sock_filepath[PATH_MAX] = {0,};
21ab4e
-                GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
21ab4e
-                snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
21ab4e
-                          volume_dir, brickinfo->hostname, export_path);
21ab4e
-
21ab4e
-                glusterd_set_socket_filepath (sock_filepath, sockpath, len);
21ab4e
-        }
21ab4e
+        GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
21ab4e
+        snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
21ab4e
+                  volume_dir, brickinfo->hostname, export_path);
21ab4e
+        glusterd_set_socket_filepath (sock_filepath, sockpath, len);
21ab4e
 }
21ab4e
 
21ab4e
 /* connection happens only if it is not aleady connected,
21ab4e
@@ -1820,6 +1814,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
21ab4e
         char                    rdma_brick_path[PATH_MAX] = {0,};
21ab4e
         struct rpc_clnt         *rpc = NULL;
21ab4e
         rpc_clnt_connection_t   *conn  = NULL;
21ab4e
+        int                     pid    = -1;
21ab4e
 
21ab4e
         GF_ASSERT (volinfo);
21ab4e
         GF_ASSERT (brickinfo);
21ab4e
@@ -1842,7 +1837,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
21ab4e
         }
21ab4e
 
21ab4e
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
21ab4e
-        if (gf_is_service_running (pidfile, NULL)) {
21ab4e
+        if (gf_is_service_running (pidfile, &pid)) {
21ab4e
                 goto connect;
21ab4e
         }
21ab4e
 
21ab4e
@@ -5034,8 +5029,6 @@ attach_brick (xlator_t *this,
21ab4e
 
21ab4e
         GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf);
21ab4e
         GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
21ab4e
-        (void) sys_unlink (pidfile2);
21ab4e
-        (void) sys_link (pidfile1, pidfile2);
21ab4e
 
21ab4e
         if (volinfo->is_snap_volume) {
21ab4e
                 snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s",
21ab4e
@@ -5055,6 +5048,10 @@ attach_brick (xlator_t *this,
21ab4e
                                                GLUSTERD_BRICK_ATTACH);
21ab4e
                         rpc_clnt_unref (rpc);
21ab4e
                         if (!ret) {
21ab4e
+                                /* PID file is copied once brick has attached
21ab4e
+                                  successfully
21ab4e
+                                */
21ab4e
+                                glusterd_copy_file (pidfile1, pidfile2);
21ab4e
                                 return 0;
21ab4e
                         }
21ab4e
                 }
21ab4e
@@ -5274,6 +5271,80 @@ find_compatible_brick (glusterd_conf_t *conf,
21ab4e
         return NULL;
21ab4e
 }
21ab4e
 
21ab4e
+/* Below function is use to populate sockpath based on passed pid
21ab4e
+   value as a argument after check the value from proc
21ab4e
+*/
21ab4e
+
21ab4e
+void
21ab4e
+glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
+{
21ab4e
+        char fname[128] = {0,};
21ab4e
+        char buf[1024] = {0,};
21ab4e
+        char cmdline[2048] = {0,};
21ab4e
+        xlator_t                *this = NULL;
21ab4e
+        int fd         = -1;
21ab4e
+        int i = 0, j = 0;
21ab4e
+        char   *ptr   = NULL;
21ab4e
+        char   *brptr   = NULL;
21ab4e
+        char tmpsockpath[PATH_MAX] = {0,};
21ab4e
+        size_t blen    = 0;
21ab4e
+
21ab4e
+        this = THIS;
21ab4e
+        GF_ASSERT (this);
21ab4e
+
21ab4e
+        snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
21ab4e
+
21ab4e
+        if (sys_access (fname , R_OK) != 0) {
21ab4e
+                gf_log (this->name, GF_LOG_ERROR,
21ab4e
+                         "brick process %d is not running", pid);
21ab4e
+                return;
21ab4e
+        }
21ab4e
+
21ab4e
+        fd = open(fname, O_RDONLY);
21ab4e
+        if (fd != -1) {
21ab4e
+                blen = (int)sys_read(fd, buf, 1024);
21ab4e
+        } else {
21ab4e
+                gf_log (this->name, GF_LOG_ERROR,
21ab4e
+                         "open failed %s to open a file %s", strerror (errno),
21ab4e
+                                                              fname);
21ab4e
+                return;
21ab4e
+        }
21ab4e
+
21ab4e
+        /* convert cmdline to single string */
21ab4e
+        for (i = 0 , j = 0; i < blen; i++)  {
21ab4e
+                if (buf[i] == '\0')
21ab4e
+                        cmdline[j++] = ' ';
21ab4e
+                else if (buf[i] < 32 || buf[i] > 126) /* remove control char */
21ab4e
+                        continue;
21ab4e
+                else if (buf[i] == '"' || buf[i] == '\\') {
21ab4e
+                        cmdline[j++] = '\\';
21ab4e
+                        cmdline[j++] = buf[i];
21ab4e
+                } else {
21ab4e
+                        cmdline[j++] = buf[i];
21ab4e
+                }
21ab4e
+        }
21ab4e
+        cmdline[j] = '\0';
21ab4e
+        if (fd)
21ab4e
+                sys_close(fd);
21ab4e
+
21ab4e
+        ptr =   strstr(cmdline, "-S ");
21ab4e
+        ptr =   strchr(ptr, '/');
21ab4e
+        brptr = strstr(ptr, "--brick-name");
21ab4e
+        i = 0;
21ab4e
+
21ab4e
+        while (ptr < brptr) {
21ab4e
+                if (*ptr != 32)
21ab4e
+                        tmpsockpath[i++] = *ptr;
21ab4e
+                ptr++;
21ab4e
+        }
21ab4e
+
21ab4e
+        if (tmpsockpath[0]) {
21ab4e
+                strncpy (sockpath, tmpsockpath , i);
21ab4e
+        }
21ab4e
+
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
 int
21ab4e
 glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
                       glusterd_brickinfo_t *brickinfo,
21ab4e
@@ -5285,7 +5356,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
         glusterd_conf_t         *conf = NULL;
21ab4e
         int32_t                 pid                   = -1;
21ab4e
         char                    pidfile[PATH_MAX]     = {0};
21ab4e
-        FILE                    *fp;
21ab4e
         char                    socketpath[PATH_MAX]  = {0};
21ab4e
         glusterd_volinfo_t      *other_vol;
21ab4e
 
21ab4e
@@ -5339,8 +5409,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
                          * same port (on another brick) and re-use that.
21ab4e
                          * TBD: re-use RPC connection across bricks
21ab4e
                          */
21ab4e
-                        glusterd_set_brick_socket_filepath (volinfo, brickinfo,
21ab4e
-                                        socketpath, sizeof (socketpath));
21ab4e
+                        if (is_brick_mx_enabled ())
21ab4e
+                                glusterd_get_sock_from_brick_pid (pid, socketpath,
21ab4e
+                                                                  sizeof(socketpath));
21ab4e
+                        else
21ab4e
+                                glusterd_set_brick_socket_filepath (volinfo, brickinfo,
21ab4e
+                                                                    socketpath,
21ab4e
+                                                                    sizeof (socketpath));
21ab4e
+                        gf_log (this->name, GF_LOG_DEBUG,
21ab4e
+                                "Using %s as sockfile for brick %s of volume %s ",
21ab4e
+                                socketpath, brickinfo->path, volinfo->volname);
21ab4e
                         (void) glusterd_brick_connect (volinfo, brickinfo,
21ab4e
                                         socketpath);
21ab4e
                 }
21ab4e
@@ -5379,12 +5457,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
          *
21ab4e
          * TBD: pray for GlusterD 2 to be ready soon.
21ab4e
          */
21ab4e
-        (void) sys_unlink (pidfile);
21ab4e
-        fp = fopen (pidfile, "w+");
21ab4e
-        if (fp) {
21ab4e
-                (void) fprintf (fp, "0\n");
21ab4e
-                (void) fclose (fp);
21ab4e
-        }
21ab4e
 
21ab4e
         ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
21ab4e
         if (ret) {
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
21ab4e
index 7d66718..cba1e06 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd.c
21ab4e
@@ -1876,9 +1876,6 @@ init (xlator_t *this)
21ab4e
         if (cds_list_empty (&conf->peers)) {
21ab4e
                 glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
21ab4e
         }
21ab4e
-        ret = glusterd_options_init (this);
21ab4e
-        if (ret < 0)
21ab4e
-                goto out;
21ab4e
 
21ab4e
         ret = glusterd_handle_upgrade_downgrade (this->options, conf, upgrade,
21ab4e
                                                  downgrade);
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
21ab4e
index e3cc6ee..dde06bd 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd.h
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd.h
21ab4e
@@ -1227,4 +1227,8 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
21ab4e
 
21ab4e
 int
21ab4e
 __glusterd_handle_reset_brick (rpcsvc_request_t *req);
21ab4e
+
21ab4e
+
21ab4e
+int
21ab4e
+glusterd_options_init (xlator_t *this);
21ab4e
 #endif
21ab4e
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
21ab4e
index dd6791c..276ba3c 100644
21ab4e
--- a/xlators/storage/posix/src/posix-helpers.c
21ab4e
+++ b/xlators/storage/posix/src/posix-helpers.c
21ab4e
@@ -51,6 +51,7 @@
21ab4e
 #include "hashfn.h"
21ab4e
 #include "glusterfs-acl.h"
21ab4e
 #include "events.h"
21ab4e
+#include <sys/types.h>
21ab4e
 
21ab4e
 char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
21ab4e
                          "trusted.glusterfs.*.xtime",
21ab4e
@@ -1842,6 +1843,9 @@ posix_health_check_thread_proc (void *data)
21ab4e
         struct posix_private *priv               = NULL;
21ab4e
         uint32_t              interval           = 0;
21ab4e
         int                   ret                = -1;
21ab4e
+        xlator_t                *top             = NULL;
21ab4e
+        xlator_list_t           **trav_p         = NULL;
21ab4e
+        int                     count            = 0;
21ab4e
 
21ab4e
         this = data;
21ab4e
         priv = this->private;
21ab4e
@@ -1890,18 +1894,33 @@ abort:
21ab4e
 
21ab4e
         xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
21ab4e
 
21ab4e
-        ret = sleep (30);
21ab4e
-        if (ret == 0) {
21ab4e
+        /* Below code is use to ensure if brick multiplexing is enabled if
21ab4e
+           count is more than 1 it means brick mux has enabled
21ab4e
+        */
21ab4e
+        if (this->ctx->active) {
21ab4e
+                top = this->ctx->active->first;
21ab4e
+                for (trav_p = &top->children; *trav_p;
21ab4e
+                                               trav_p = &(*trav_p)->next) {
21ab4e
+                        count++;
21ab4e
+                }
21ab4e
+        }
21ab4e
+
21ab4e
+        if (count == 1) {
21ab4e
                 gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
21ab4e
                         "still alive! -> SIGTERM");
21ab4e
-                kill (getpid(), SIGTERM);
21ab4e
-        }
21ab4e
+                ret = sleep (30);
21ab4e
 
21ab4e
-        ret = sleep (30);
21ab4e
-        if (ret == 0) {
21ab4e
+                /* Need to kill the process only while brick mux has not enabled
21ab4e
+                */
21ab4e
+                if (ret == 0)
21ab4e
+                        kill (getpid(), SIGTERM);
21ab4e
+
21ab4e
+                ret = sleep (30);
21ab4e
                 gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
21ab4e
                         "still alive! -> SIGKILL");
21ab4e
-                kill (getpid(), SIGKILL);
21ab4e
+                if (ret == 0)
21ab4e
+                        kill (getpid(), SIGKILL);
21ab4e
+
21ab4e
         }
21ab4e
 
21ab4e
         return NULL;
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e