a3470f
From 53ecd916d5ef56e164228ba123b078d4b30bfa81 Mon Sep 17 00:00:00 2001
a3470f
From: Mohit Agrawal <moagrawal@redhat.com>
a3470f
Date: Thu, 12 Jul 2018 13:29:48 +0530
a3470f
Subject: [PATCH 333/333] glusterd: Add multiple checks before attach/start a
a3470f
 brick
a3470f
a3470f
Problem: In brick mux scenario sometime glusterd is not able
a3470f
         to start/attach a brick and gluster v status shows
a3470f
         brick is already running
a3470f
a3470f
Solution:
a3470f
          1) To make sure brick is running check brick_path in
a3470f
             /proc/<pid>/fd , if a brick is consumed by the brick
a3470f
             process it means brick stack is come up otherwise not
a3470f
          2) Before start/attach a brick check if a brick is mounted
a3470f
             or not
a3470f
          3) At the time of printing volume status check brick is
a3470f
             consumed by any brick process
a3470f
a3470f
Test:  To test the same followed procedure
a3470f
       1) Setup brick mux environment on a vm
a3470f
       2) Put a breaking point in gdb in function posix_health_check_thread_proc
a3470f
          at the time of notify GF_EVENT_CHILD_DOWN event
a3470f
       3) unmount anyone brick path forcefully
a3470f
       4) check gluster v status it will show N/A for the brick
a3470f
       5) Try to start volume with force option, glusterd throw
a3470f
          message "No device available for mount brick"
a3470f
       6) Mount the brick_root path
a3470f
       7) Try to start volume with force option
a3470f
       8) down brick is started successfully
a3470f
a3470f
> Change-Id: I91898dad21d082ebddd12aa0d1f7f0ed012bdf69
a3470f
> fixes: bz#1595320
a3470f
> (cherry picked from commit 9400b6f2c8aa219a493961e0ab9770b7f12e80d2)
a3470f
> (Reviewed on upstream link https://review.gluster.org/#/c/20202/)
a3470f
a3470f
Change-Id: I62459910272754e4e062b2725fea2a1e68d743f1
a3470f
BUG: 1589279
a3470f
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/145269
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
a3470f
---
a3470f
 glusterfsd/src/glusterfsd-mgmt.c                |   3 +
a3470f
 tests/basic/bug-1595320.t                       |  92 +++++++++
a3470f
 tests/basic/posix/shared-statfs.t               |   2 +
a3470f
 tests/bitrot/bug-1373520.t                      |   1 +
a3470f
 tests/bugs/distribute/bug-1368012.t             |   2 +
a3470f
 tests/bugs/distribute/bug-853258.t              |   1 +
a3470f
 tests/bugs/quota/bug-1293601.t                  |   3 +-
a3470f
 xlators/mgmt/glusterd/src/glusterd-snapshot.c   |   2 +-
a3470f
 xlators/mgmt/glusterd/src/glusterd-utils.c      | 261 ++++++++++++++++++++----
a3470f
 xlators/mgmt/glusterd/src/glusterd-utils.h      |   6 +-
a3470f
 xlators/mgmt/glusterd/src/glusterd-volume-ops.c |   7 +-
a3470f
 11 files changed, 329 insertions(+), 51 deletions(-)
a3470f
 create mode 100644 tests/basic/bug-1595320.t
a3470f
a3470f
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
a3470f
index 30a717f..cbd436a 100644
a3470f
--- a/glusterfsd/src/glusterfsd-mgmt.c
a3470f
+++ b/glusterfsd/src/glusterfsd-mgmt.c
a3470f
@@ -1010,6 +1010,9 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
a3470f
                                 "got attach for %s but no active graph",
a3470f
                                 xlator_req.name);
a3470f
                 }
a3470f
+                if (ret) {
a3470f
+                        ret = -1;
a3470f
+                }
a3470f
 
a3470f
                 glusterfs_translator_info_response_send (req, ret, NULL, NULL);
a3470f
 
a3470f
diff --git a/tests/basic/bug-1595320.t b/tests/basic/bug-1595320.t
a3470f
new file mode 100644
a3470f
index 0000000..9d856ee
a3470f
--- /dev/null
a3470f
+++ b/tests/basic/bug-1595320.t
a3470f
@@ -0,0 +1,92 @@
a3470f
+#!/bin/bash
a3470f
+
a3470f
+. $(dirname $0)/../include.rc
a3470f
+. $(dirname $0)/../volume.rc
a3470f
+. $(dirname $0)/../snapshot.rc
a3470f
+
a3470f
+cleanup
a3470f
+
a3470f
+function count_up_bricks {
a3470f
+        $CLI --xml volume status $V0 | grep '<status>1' | wc -l
a3470f
+}
a3470f
+
a3470f
+function count_brick_processes {
a3470f
+        pgrep glusterfsd | wc -l
a3470f
+}
a3470f
+
a3470f
+# Setup 3 LVMS
a3470f
+LVM_PREFIX="test"
a3470f
+TEST init_n_bricks 3
a3470f
+TEST setup_lvm 3
a3470f
+
a3470f
+# Start glusterd
a3470f
+TEST glusterd
a3470f
+TEST pidof glusterd
a3470f
+
a3470f
+# Create volume and enable brick multiplexing
a3470f
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3
a3470f
+gluster v set all cluster.brick-multiplex on
a3470f
+
a3470f
+# Start the volume
a3470f
+TEST $CLI volume start $V0
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
a3470f
+EXPECT 1 count_brick_processes
a3470f
+
a3470f
+# Kill volume ungracefully
a3470f
+brick_pid=`pgrep glusterfsd`
a3470f
+
a3470f
+# Make sure every brick root should be consumed by a brick process
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+
a3470f
+b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid)
a3470f
+b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid)
a3470f
+b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid)
a3470f
+
a3470f
+kill -9 $brick_pid
a3470f
+EXPECT 0 count_brick_processes
a3470f
+
a3470f
+# Unmount 3rd brick root from node
a3470f
+brick_root=$L3
a3470f
+TEST umount -l $brick_root 2>/dev/null
a3470f
+
a3470f
+# Start the volume only 2 brick should be start
a3470f
+TEST $CLI volume start $V0 force
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
a3470f
+EXPECT 1 count_brick_processes
a3470f
+
a3470f
+brick_pid=`pgrep glusterfsd`
a3470f
+
a3470f
+# Make sure only two brick root should be consumed by a brick process
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 0 ]
a3470f
+
a3470f
+# Mount the brick root
a3470f
+TEST mount -t xfs -o nouuid  /dev/test_vg_3/brick_lvm $brick_root
a3470f
+
a3470f
+# Replace brick_pid file to test brick_attach code
a3470f
+TEST cp $b1_pid_file $b3_pid_file
a3470f
+
a3470f
+# Start the volume all brick should be up
a3470f
+TEST $CLI volume start $V0 force
a3470f
+
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
a3470f
+EXPECT 1 count_brick_processes
a3470f
+
a3470f
+# Make sure every brick root should be consumed by a brick process
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
a3470f
+TEST [ $n -eq 1 ]
a3470f
+
a3470f
+cleanup
a3470f
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
a3470f
index 8caa9fa..3343956 100644
a3470f
--- a/tests/basic/posix/shared-statfs.t
a3470f
+++ b/tests/basic/posix/shared-statfs.t
a3470f
@@ -23,6 +23,7 @@ TEST MOUNT_LOOP $LO2 $B0/${V0}2
a3470f
 # Create a subdir in mountpoint and use that for volume.
a3470f
 TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
a3470f
 TEST $CLI volume start $V0
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
a3470f
 TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
a3470f
 total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
a3470f
 # Keeping the size less than 200M mainly because XFS will use
a3470f
@@ -38,6 +39,7 @@ EXPECT 'Stopped' volinfo_field $V0 'Status';
a3470f
 TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3
a3470f
 
a3470f
 TEST $CLI volume start $V0
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
a3470f
 TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
a3470f
 total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
a3470f
 TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
a3470f
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
a3470f
index 225d3b1..c09d424 100644
a3470f
--- a/tests/bitrot/bug-1373520.t
a3470f
+++ b/tests/bitrot/bug-1373520.t
a3470f
@@ -11,6 +11,7 @@ TEST pidof glusterd
a3470f
 #Create a disperse volume
a3470f
 TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
a3470f
 TEST $CLI volume start $V0
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
a3470f
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
a3470f
 
a3470f
 #Disable md-cache
a3470f
diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t
a3470f
index f89314b..b861554 100644
a3470f
--- a/tests/bugs/distribute/bug-1368012.t
a3470f
+++ b/tests/bugs/distribute/bug-1368012.t
a3470f
@@ -22,6 +22,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
a3470f
 EXPECT 'Created' volinfo_field $V0 'Status';
a3470f
 ## Start volume and verify
a3470f
 TEST $CLI volume start $V0;
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
a3470f
 TEST $CLI volume set $V0 performance.stat-prefetch off
a3470f
 EXPECT 'Started' volinfo_field $V0 'Status';
a3470f
 TEST glusterfs -s $H0 --volfile-id=$V0 $M0
a3470f
@@ -36,6 +37,7 @@ TEST permission_root=`stat -c "%A" $M0`
a3470f
 TEST echo $permission_root
a3470f
 #Add-brick
a3470f
 TEST $CLI volume add-brick $V0 $H0:/${V0}3
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
a3470f
 
a3470f
 #Allow one lookup to happen
a3470f
 TEST pushd $M0
a3470f
diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t
a3470f
index e39f507..6817d9e 100755
a3470f
--- a/tests/bugs/distribute/bug-853258.t
a3470f
+++ b/tests/bugs/distribute/bug-853258.t
a3470f
@@ -31,6 +31,7 @@ done
a3470f
 
a3470f
 # Expand the volume and force assignment of new ranges.
a3470f
 TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
a3470f
 # Force assignment of initial ranges.
a3470f
 TEST $CLI volume rebalance $V0 fix-layout start
a3470f
 EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
a3470f
diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t
a3470f
index def4ef9..741758b 100644
a3470f
--- a/tests/bugs/quota/bug-1293601.t
a3470f
+++ b/tests/bugs/quota/bug-1293601.t
a3470f
@@ -9,6 +9,7 @@ TEST glusterd
a3470f
 
a3470f
 TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
a3470f
 TEST $CLI volume start $V0
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
a3470f
 TEST $CLI volume quota $V0 enable
a3470f
 
a3470f
 TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
a3470f
@@ -27,6 +28,6 @@ EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/"
a3470f
 TEST $CLI volume quota $V0 disable
a3470f
 TEST $CLI volume quota $V0 enable
a3470f
 
a3470f
-EXPECT_WITHIN 40 "1.0MB" quotausage "/"
a3470f
+EXPECT_WITHIN 60 "1.0MB" quotausage "/"
a3470f
 
a3470f
 cleanup;
a3470f
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
a3470f
index 304cef6..09e10bf 100644
a3470f
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
a3470f
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
a3470f
@@ -2844,7 +2844,7 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
a3470f
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);
a3470f
         if (gf_is_service_running (pidfile, &pid)) {
a3470f
                 (void) send_attach_req (this, brickinfo->rpc,
a3470f
-                                        brickinfo->path, NULL,
a3470f
+                                        brickinfo->path, NULL, NULL,
a3470f
                                         GLUSTERD_BRICK_TERMINATE);
a3470f
                 brickinfo->status = GF_BRICK_STOPPED;
a3470f
         }
a3470f
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
a3470f
index 95df889..fe9cc75 100644
a3470f
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
a3470f
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
a3470f
@@ -2186,7 +2186,7 @@ retry:
a3470f
                 goto out;
a3470f
         }
a3470f
 
a3470f
-        ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
a3470f
+        ret = glusterd_brick_process_add_brick (brickinfo);
a3470f
         if (ret) {
a3470f
                 gf_msg (this->name, GF_LOG_ERROR, 0,
a3470f
                         GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s "
a3470f
@@ -2372,8 +2372,7 @@ out:
a3470f
 }
a3470f
 
a3470f
 int
a3470f
-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
a3470f
-                                  glusterd_volinfo_t *volinfo)
a3470f
+glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo)
a3470f
 {
a3470f
         int                      ret = -1;
a3470f
         xlator_t                *this = NULL;
a3470f
@@ -2500,7 +2499,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
a3470f
                                       brickinfo->hostname, brickinfo->path);
a3470f
 
a3470f
                         (void) send_attach_req (this, brickinfo->rpc,
a3470f
-                                                brickinfo->path, NULL,
a3470f
+                                                brickinfo->path, NULL, NULL,
a3470f
                                                 GLUSTERD_BRICK_TERMINATE);
a3470f
                 } else {
a3470f
                         gf_msg_debug (this->name, 0, "About to stop glusterfsd"
a3470f
@@ -5426,23 +5425,92 @@ static int32_t
a3470f
 attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count,
a3470f
                        void *v_frame)
a3470f
 {
a3470f
-        call_frame_t    *frame  = v_frame;
a3470f
-        glusterd_conf_t *conf   = frame->this->private;
a3470f
-        glusterd_brickinfo_t *brickinfo = frame->local;
a3470f
+        call_frame_t                     *frame     = v_frame;
a3470f
+        glusterd_conf_t                  *conf      = frame->this->private;
a3470f
+        glusterd_brickinfo_t             *brickinfo = frame->local;
a3470f
+        glusterd_brickinfo_t             *other_brick = frame->cookie;
a3470f
+        glusterd_volinfo_t               *volinfo   =  NULL;
a3470f
+        xlator_t                         *this      = THIS;
a3470f
+        int                               ret       = -1;
a3470f
+        char                              pidfile1[PATH_MAX]      = {0};
a3470f
+        char                              pidfile2[PATH_MAX]      = {0};
a3470f
+        gf_getspec_rsp                    rsp   = {0,};
a3470f
 
a3470f
         frame->local = NULL;
a3470f
-        brickinfo->port_registered = _gf_true;
a3470f
+        frame->cookie = NULL;
a3470f
+
a3470f
+        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
a3470f
+        if (ret < 0) {
a3470f
+                gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
a3470f
+                ret   = -1;
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        ret =  glusterd_get_volinfo_from_brick (other_brick->path,
a3470f
+                                                &volinfo);
a3470f
+        if (ret) {
a3470f
+                gf_msg (THIS->name, GF_LOG_ERROR, 0,
a3470f
+                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
a3470f
+                        " from brick(%s) so  pidfile copying/unlink will fail",
a3470f
+                        other_brick->path);
a3470f
+                goto out;
a3470f
+        }
a3470f
+        GLUSTERD_GET_BRICK_PIDFILE (pidfile1, volinfo, other_brick, conf);
a3470f
+        volinfo = NULL;
a3470f
+
a3470f
+        ret =  glusterd_get_volinfo_from_brick (brickinfo->path,
a3470f
+                                                &volinfo);
a3470f
+        if (ret) {
a3470f
+                gf_msg (THIS->name, GF_LOG_ERROR, 0,
a3470f
+                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
a3470f
+                        " from brick(%s) so  pidfile copying/unlink will fail",
a3470f
+                        brickinfo->path);
a3470f
+                goto out;
a3470f
+        }
a3470f
+        GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
a3470f
+
a3470f
+        if (rsp.op_ret == 0) {
a3470f
+                brickinfo->port_registered = _gf_true;
a3470f
+
a3470f
+                /* PID file is copied once brick has attached
a3470f
+                   successfully
a3470f
+                */
a3470f
+                glusterd_copy_file (pidfile1, pidfile2);
a3470f
+                brickinfo->status = GF_BRICK_STARTED;
a3470f
+                brickinfo->rpc = rpc_clnt_ref (other_brick->rpc);
a3470f
+                gf_log (THIS->name, GF_LOG_INFO, "brick %s is attached successfully",
a3470f
+                        brickinfo->path);
a3470f
+        } else {
a3470f
+                gf_log (THIS->name, GF_LOG_INFO, "attach_brick failed pidfile"
a3470f
+                        " is %s for brick_path %s", pidfile2, brickinfo->path);
a3470f
+                brickinfo->port = 0;
a3470f
+                brickinfo->status = GF_BRICK_STOPPED;
a3470f
+                ret = glusterd_brick_process_remove_brick (brickinfo);
a3470f
+                if (ret)
a3470f
+                        gf_msg_debug (this->name, 0, "Couldn't remove brick from"
a3470f
+                                      " brick process");
a3470f
+                LOCK (&volinfo->lock);
a3470f
+                ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
a3470f
+                UNLOCK (&volinfo->lock);
a3470f
+                if (ret) {
a3470f
+                       gf_msg (this->name, GF_LOG_ERROR, 0,
a3470f
+                               GD_MSG_VOLINFO_SET_FAIL,
a3470f
+                               "Failed to store volinfo of "
a3470f
+                               "%s volume", volinfo->volname);
a3470f
+                       goto out;
a3470f
+                }
a3470f
+        }
a3470f
+out:
a3470f
         synclock_lock (&conf->big_lock);
a3470f
         --(conf->blockers);
a3470f
         synclock_unlock (&conf->big_lock);
a3470f
-
a3470f
         STACK_DESTROY (frame->root);
a3470f
         return 0;
a3470f
 }
a3470f
 
a3470f
 int
a3470f
 send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
a3470f
-                 glusterd_brickinfo_t *brickinfo, int op)
a3470f
+                 glusterd_brickinfo_t *brickinfo, glusterd_brickinfo_t *other_brick, int op)
a3470f
 {
a3470f
         int                             ret      = -1;
a3470f
         struct iobuf                    *iobuf    = NULL;
a3470f
@@ -5516,6 +5584,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
a3470f
 
a3470f
         if (op == GLUSTERD_BRICK_ATTACH) {
a3470f
                 frame->local = brickinfo;
a3470f
+                frame->cookie = other_brick;
a3470f
                 cbkfn = attach_brick_callback;
a3470f
         }
a3470f
         /* Send the msg */
a3470f
@@ -5582,27 +5651,19 @@ attach_brick (xlator_t *this,
a3470f
                 rpc = rpc_clnt_ref (other_brick->rpc);
a3470f
                 if (rpc) {
a3470f
                         ret = send_attach_req (this, rpc, path, brickinfo,
a3470f
+                                               other_brick,
a3470f
                                                GLUSTERD_BRICK_ATTACH);
a3470f
                         rpc_clnt_unref (rpc);
a3470f
                         if (!ret) {
a3470f
                                 ret = pmap_registry_extend (this, other_brick->port,
a3470f
-                                                            brickinfo->path);
a3470f
+                                            brickinfo->path);
a3470f
                                 if (ret != 0) {
a3470f
                                         gf_log (this->name, GF_LOG_ERROR,
a3470f
                                                 "adding brick to process failed");
a3470f
-                                        return ret;
a3470f
+                                        goto out;
a3470f
                                 }
a3470f
-
a3470f
-                                /* PID file is copied once brick has attached
a3470f
-                                  successfully
a3470f
-                                */
a3470f
-                                glusterd_copy_file (pidfile1, pidfile2);
a3470f
                                 brickinfo->port = other_brick->port;
a3470f
-                                brickinfo->status = GF_BRICK_STARTED;
a3470f
-                                brickinfo->rpc =
a3470f
-                                        rpc_clnt_ref (other_brick->rpc);
a3470f
-                                ret = glusterd_brick_process_add_brick (brickinfo,
a3470f
-                                                                        volinfo);
a3470f
+                                ret = glusterd_brick_process_add_brick (brickinfo);
a3470f
                                 if (ret) {
a3470f
                                         gf_msg (this->name, GF_LOG_ERROR, 0,
a3470f
                                                 GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
a3470f
@@ -5611,29 +5672,23 @@ attach_brick (xlator_t *this,
a3470f
                                                 brickinfo->path);
a3470f
                                         return ret;
a3470f
                                 }
a3470f
-
a3470f
-                                if (ret) {
a3470f
-                                        gf_msg_debug (this->name, 0, "Add brick"
a3470f
-                                                    " to brick process failed");
a3470f
-                                        return ret;
a3470f
-                                }
a3470f
-
a3470f
                                 return 0;
a3470f
                         }
a3470f
                 }
a3470f
                 /*
a3470f
-                 * It might not actually be safe to manipulate the lock like
a3470f
-                 * this, but if we don't then the connection can never actually
a3470f
-                 * complete and retries are useless.  Unfortunately, all of the
a3470f
-                 * alternatives (e.g. doing all of this in a separate thread)
a3470f
-                 * are much more complicated and risky.  TBD: see if there's a
a3470f
-                 * better way
a3470f
+                 * It might not actually be safe to manipulate the lock
a3470f
+                 * like this, but if we don't then the connection can
a3470f
+                 * never actually complete and retries are useless.
a3470f
+                 * Unfortunately, all of the alternatives (e.g. doing
a3470f
+                 * all of this in a separate thread) are much more
a3470f
+                 * complicated and risky.
a3470f
+                 * TBD: see if there's a better way
a3470f
                  */
a3470f
                 synclock_unlock (&conf->big_lock);
a3470f
                 sleep (1);
a3470f
                 synclock_lock (&conf->big_lock);
a3470f
         }
a3470f
-
a3470f
+out:
a3470f
         gf_log (this->name, GF_LOG_WARNING,
a3470f
                 "attach failed for %s", brickinfo->path);
a3470f
         return ret;
a3470f
@@ -5855,6 +5910,7 @@ find_compatible_brick (glusterd_conf_t *conf,
a3470f
         return NULL;
a3470f
 }
a3470f
 
a3470f
+
a3470f
 /* Below function is use to populate sockpath based on passed pid
a3470f
    value as a argument after check the value from proc and also
a3470f
    check if passed pid is match with running  glusterfs process
a3470f
@@ -5941,6 +5997,62 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
a3470f
 }
a3470f
 
a3470f
 
a3470f
+char *
a3470f
+search_brick_path_from_proc (pid_t brick_pid, char *brickpath)
a3470f
+{
a3470f
+        struct dirent *dp = NULL;
a3470f
+        DIR *dirp = NULL;
a3470f
+        size_t len = 0;
a3470f
+        int fd = -1;
a3470f
+        char path[PATH_MAX] = {0,};
a3470f
+        char sym[PATH_MAX] = {0,};
a3470f
+        struct dirent  scratch[2] = {{0,},};
a3470f
+        char *brick_path = NULL;
a3470f
+
a3470f
+        if (!brickpath)
a3470f
+                goto out;
a3470f
+
a3470f
+        sprintf(path, "/proc/%d/fd/", brick_pid);
a3470f
+        dirp = sys_opendir (path);
a3470f
+        if (!dirp)
a3470f
+                goto out;
a3470f
+
a3470f
+        len = strlen (path);
a3470f
+        if (len >= (sizeof(path) - 2))
a3470f
+                goto out;
a3470f
+
a3470f
+        fd = dirfd (dirp);
a3470f
+        if (fd  < 0)
a3470f
+                goto out;
a3470f
+
a3470f
+        memset(path, 0, sizeof(path));
a3470f
+        memset(sym, 0, sizeof(sym));
a3470f
+
a3470f
+        while ((dp = sys_readdir(dirp, scratch))) {
a3470f
+                if (!strcmp(dp->d_name, ".") ||
a3470f
+                    !strcmp(dp->d_name, ".."))
a3470f
+                        continue;
a3470f
+
a3470f
+                /* check for non numerical descriptors */
a3470f
+                if (!strtol(dp->d_name, (char **)NULL, 10))
a3470f
+                        continue;
a3470f
+
a3470f
+                len = readlinkat (fd, dp->d_name, sym, sizeof(sym) - 1);
a3470f
+                if (len > 1) {
a3470f
+                        sym[len] = '\0';
a3470f
+                        if (!strcmp (sym, brickpath)) {
a3470f
+                                brick_path = gf_strdup(sym);
a3470f
+                                break;
a3470f
+                        }
a3470f
+                        memset (sym, 0, sizeof (sym));
a3470f
+                }
a3470f
+        }
a3470f
+out:
a3470f
+        sys_closedir(dirp);
a3470f
+        return brick_path;
a3470f
+}
a3470f
+
a3470f
+
a3470f
 int
a3470f
 glusterd_brick_start (glusterd_volinfo_t *volinfo,
a3470f
                       glusterd_brickinfo_t *brickinfo,
a3470f
@@ -5954,7 +6066,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
a3470f
         int32_t                 pid                   = -1;
a3470f
         char                    pidfile[PATH_MAX]     = {0};
a3470f
         char                    socketpath[PATH_MAX]  = {0};
a3470f
+        char                    *brickpath            = NULL;
a3470f
         glusterd_volinfo_t      *other_vol;
a3470f
+        struct statvfs           brickstat = {0,};
a3470f
 
a3470f
         this = THIS;
a3470f
         GF_ASSERT (this);
a3470f
@@ -6000,6 +6114,28 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
a3470f
                 brickinfo->start_triggered = _gf_true;
a3470f
 
a3470f
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
a3470f
+
a3470f
+        ret = sys_statvfs (brickinfo->path, &brickstat);
a3470f
+        if (ret) {
a3470f
+                gf_msg (this->name, GF_LOG_ERROR,
a3470f
+                        errno, GD_MSG_BRICKINFO_CREATE_FAIL,
a3470f
+                        "failed to get statfs() call on brick %s",
a3470f
+                        brickinfo->path);
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        /* Compare fsid is helpful to ensure the existence of a brick_root
a3470f
+           path before the start/attach a brick
a3470f
+        */
a3470f
+        if (brickinfo->statfs_fsid &&
a3470f
+            (brickinfo->statfs_fsid != brickstat.f_fsid)) {
a3470f
+                gf_log (this->name, GF_LOG_ERROR,
a3470f
+                        "fsid comparison is failed it means Brick root path"
a3470f
+                        " %s is not created by glusterd, start/attach will also fail",
a3470f
+                        brickinfo->path);
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
         if (gf_is_service_running (pidfile, &pid)) {
a3470f
                 if (brickinfo->status != GF_BRICK_STARTING &&
a3470f
                     brickinfo->status != GF_BRICK_STARTED) {
a3470f
@@ -6019,12 +6155,29 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
a3470f
                          * TBD: re-use RPC connection across bricks
a3470f
                          */
a3470f
                         if (is_brick_mx_enabled ()) {
a3470f
+                                brickpath = search_brick_path_from_proc (pid, brickinfo->path);
a3470f
+                                if (!brickpath) {
a3470f
+                                        gf_log (this->name, GF_LOG_INFO,
a3470f
+                                                "Either pid %d is not running or brick"
a3470f
+                                                " path %s is not consumed so cleanup pidfile",
a3470f
+                                                pid, brickinfo->path);
a3470f
+                                        /* search brick is failed so unlink pidfile */
a3470f
+                                        if (sys_access (pidfile , R_OK) == 0) {
a3470f
+                                                sys_unlink (pidfile);
a3470f
+                                        }
a3470f
+                                        goto run;
a3470f
+                                }
a3470f
+                                GF_FREE (brickpath);
a3470f
                                 ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
a3470f
                                                                         sizeof(socketpath));
a3470f
                                 if (ret) {
a3470f
-                                        gf_log (this->name, GF_LOG_DEBUG,
a3470f
+                                        gf_log (this->name, GF_LOG_INFO,
a3470f
                                                 "Either pid %d is not running or is not match"
a3470f
                                                 " with any running brick process ", pid);
a3470f
+                                        /* Fetch unix socket is failed so unlink pidfile */
a3470f
+                                        if (sys_access (pidfile , R_OK) == 0) {
a3470f
+                                                sys_unlink (pidfile);
a3470f
+                                        }
a3470f
                                         goto run;
a3470f
                                 }
a3470f
                         } else {
a3470f
@@ -6039,7 +6192,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
a3470f
                         (void) glusterd_brick_connect (volinfo, brickinfo,
a3470f
                                         socketpath);
a3470f
 
a3470f
-                        ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
a3470f
+                        ret = glusterd_brick_process_add_brick (brickinfo);
a3470f
                         if (ret) {
a3470f
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
a3470f
                                         GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
a3470f
@@ -6079,6 +6232,10 @@ run:
a3470f
                 if (ret == 0) {
a3470f
                         goto out;
a3470f
                 }
a3470f
+                /* Attach_brick is failed so unlink pidfile */
a3470f
+                if (sys_access (pidfile , R_OK) == 0) {
a3470f
+                        sys_unlink (pidfile);
a3470f
+                }
a3470f
         }
a3470f
 
a3470f
         /*
a3470f
@@ -7063,14 +7220,15 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
a3470f
                             dict_t  *dict, int32_t count)
a3470f
 {
a3470f
 
a3470f
-        int             ret                   = -1;
a3470f
-        int32_t         pid                   = -1;
a3470f
-        char            key[1024]             = {0};
a3470f
-        char            base_key[1024]        = {0};
a3470f
-        char            pidfile[PATH_MAX]     = {0};
a3470f
+        int              ret                  = -1;
a3470f
+        int32_t          pid                  = -1;
a3470f
+        char             key[1024]            = {0};
a3470f
+        char             base_key[1024]       = {0};
a3470f
+        char             pidfile[PATH_MAX]    = {0};
a3470f
         xlator_t        *this                 = NULL;
a3470f
         glusterd_conf_t *priv                 = NULL;
a3470f
-        gf_boolean_t    brick_online          = _gf_false;
a3470f
+        gf_boolean_t     brick_online         = _gf_false;
a3470f
+        char            *brickpath            = NULL;
a3470f
 
a3470f
         GF_ASSERT (volinfo);
a3470f
         GF_ASSERT (brickinfo);
a3470f
@@ -7127,7 +7285,20 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
a3470f
         if (glusterd_is_brick_started (brickinfo)) {
a3470f
                 if (gf_is_service_running (pidfile, &pid) &&
a3470f
                     brickinfo->port_registered) {
a3470f
-                        brick_online = _gf_true;
a3470f
+                        if (!is_brick_mx_enabled ()) {
a3470f
+                                brick_online = _gf_true;
a3470f
+                        } else {
a3470f
+                                brickpath = search_brick_path_from_proc (pid, brickinfo->path);
a3470f
+                                if (!brickpath) {
a3470f
+                                        gf_log (this->name, GF_LOG_INFO,
a3470f
+                                                "brick path %s is not consumed",
a3470f
+                                                brickinfo->path);
a3470f
+                                        brick_online = _gf_false;
a3470f
+                                } else {
a3470f
+                                        brick_online = _gf_true;
a3470f
+                                        GF_FREE (brickpath);
a3470f
+                                }
a3470f
+                        }
a3470f
                 } else {
a3470f
                         pid = -1;
a3470f
                 }
a3470f
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
a3470f
index 4c9561e..4835728 100644
a3470f
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
a3470f
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
a3470f
@@ -179,8 +179,7 @@ int32_t
a3470f
 glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
a3470f
 
a3470f
 int
a3470f
-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
a3470f
-                                  glusterd_volinfo_t *volinfo);
a3470f
+glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo);
a3470f
 
a3470f
 int
a3470f
 glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
a3470f
@@ -200,7 +199,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
a3470f
 
a3470f
 int
a3470f
 send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
a3470f
-                 glusterd_brickinfo_t *brick, int op);
a3470f
+                 glusterd_brickinfo_t *brick,
a3470f
+                 glusterd_brickinfo_t *other_brick, int op);
a3470f
 
a3470f
 glusterd_volinfo_t *
a3470f
 glusterd_volinfo_ref (glusterd_volinfo_t *volinfo);
a3470f
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
a3470f
index e34d58a..8bb0b6d 100644
a3470f
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
a3470f
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
a3470f
@@ -2585,8 +2585,13 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
a3470f
         }
a3470f
 
a3470f
         glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED);
a3470f
-
a3470f
+        /* Update volinfo on disk in critical section because
a3470f
+           attach_brick_callback can also call store_volinfo for same
a3470f
+           volume to update volinfo on disk
a3470f
+        */
a3470f
+        LOCK (&volinfo->lock);
a3470f
         ret = glusterd_store_volinfo (volinfo, verincrement);
a3470f
+        UNLOCK (&volinfo->lock);
a3470f
         if (ret) {
a3470f
                 gf_msg (this->name, GF_LOG_ERROR, 0,
a3470f
                         GD_MSG_VOLINFO_SET_FAIL,
a3470f
-- 
a3470f
1.8.3.1
a3470f