d1681e
From 4c7460554f2f7c6515b77e70c878fc15d3813237 Mon Sep 17 00:00:00 2001
d1681e
From: Mohit Agrawal <moagrawal@redhat.com>
d1681e
Date: Sat, 4 Aug 2018 12:05:03 +0530
d1681e
Subject: [PATCH 344/351] glusterd: Compare volume_id before start/attach a
d1681e
 brick
d1681e
d1681e
Problem: After reboot a node brick is not coming up because
d1681e
         fsid comparison is failed before start a brick
d1681e
d1681e
Solution: Instead of comparing fsid compare volume_id to
d1681e
          resolve the same because fsid is changed after
d1681e
          reboot a node but volume_id persist as a xattr
d1681e
          on brick_root path at the time of creating a volume.
d1681e
d1681e
> Change-Id: Ic289aab1b4ebfd83bbcae8438fee26ae61a0fff4
d1681e
> fixes: bz#1612418
d1681e
> (Cherry pick from commit bd8fc26a278697c30537d879ea5402db7ebab577)
d1681e
> (Reviwed on upstream link https://review.gluster.org/#/c/glusterfs/+/20638/)
d1681e
d1681e
Change-Id: Ia183c1d67af662584debcb301171c35be5ac0d9d
d1681e
BUG: 1612098
d1681e
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/146835
d1681e
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
d1681e
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d1681e
---
d1681e
 tests/{basic => bugs/glusterd}/bug-1595320.t |  9 +++---
d1681e
 xlators/mgmt/glusterd/src/glusterd-utils.c   | 47 ++++++++++++++++------------
d1681e
 2 files changed, 32 insertions(+), 24 deletions(-)
d1681e
 rename tests/{basic => bugs/glusterd}/bug-1595320.t (94%)
d1681e
d1681e
diff --git a/tests/basic/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t
d1681e
similarity index 94%
d1681e
rename from tests/basic/bug-1595320.t
d1681e
rename to tests/bugs/glusterd/bug-1595320.t
d1681e
index 9d856ee..f41df9d 100644
d1681e
--- a/tests/basic/bug-1595320.t
d1681e
+++ b/tests/bugs/glusterd/bug-1595320.t
d1681e
@@ -1,8 +1,8 @@
d1681e
 #!/bin/bash
d1681e
 
d1681e
-. $(dirname $0)/../include.rc
d1681e
-. $(dirname $0)/../volume.rc
d1681e
-. $(dirname $0)/../snapshot.rc
d1681e
+. $(dirname $0)/../../include.rc
d1681e
+. $(dirname $0)/../../volume.rc
d1681e
+. $(dirname $0)/../../snapshot.rc
d1681e
 
d1681e
 cleanup
d1681e
 
d1681e
@@ -52,7 +52,7 @@ EXPECT 0 count_brick_processes
d1681e
 
d1681e
 # Unmount 3rd brick root from node
d1681e
 brick_root=$L3
d1681e
-TEST umount -l $brick_root 2>/dev/null
d1681e
+_umount_lv 3
d1681e
 
d1681e
 # Start the volume only 2 brick should be start
d1681e
 TEST $CLI volume start $V0 force
d1681e
@@ -70,6 +70,7 @@ n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
d1681e
 TEST [ $n -eq 0 ]
d1681e
 
d1681e
 # Mount the brick root
d1681e
+TEST mkdir -p $brick_root
d1681e
 TEST mount -t xfs -o nouuid  /dev/test_vg_3/brick_lvm $brick_root
d1681e
 
d1681e
 # Replace brick_pid file to test brick_attach code
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
index 6f7c787..1752425 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
@@ -5439,6 +5439,12 @@ attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count,
d1681e
         frame->local = NULL;
d1681e
         frame->cookie = NULL;
d1681e
 
d1681e
+        if (!iov) {
d1681e
+              gf_log (frame->this->name, GF_LOG_ERROR, "iov is NULL");
d1681e
+              ret   = -1;
d1681e
+              goto out;
d1681e
+        }
d1681e
+
d1681e
         ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
d1681e
         if (ret < 0) {
d1681e
                 gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
d1681e
@@ -6073,17 +6079,19 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
d1681e
                       gf_boolean_t wait,
d1681e
                       gf_boolean_t only_connect)
d1681e
 {
d1681e
-        int                     ret   = -1;
d1681e
+        int                      ret   = -1;
d1681e
         xlator_t                *this = NULL;
d1681e
         glusterd_brickinfo_t    *other_brick;
d1681e
         glusterd_conf_t         *conf = NULL;
d1681e
-        int32_t                 pid                   = -1;
d1681e
-        char                    pidfile[PATH_MAX]     = {0};
d1681e
-        char                    socketpath[PATH_MAX]  = {0};
d1681e
-        char                    *brickpath            = NULL;
d1681e
+        int32_t                  pid                   = -1;
d1681e
+        char                     pidfile[PATH_MAX]     = {0};
d1681e
+        char                     socketpath[PATH_MAX]  = {0};
d1681e
+        char                    *brickpath             = NULL;
d1681e
         glusterd_volinfo_t      *other_vol;
d1681e
-        struct statvfs           brickstat = {0,};
d1681e
         gf_boolean_t             is_service_running = _gf_false;
d1681e
+        uuid_t                   volid                 = {0,};
d1681e
+        ssize_t                  size                  = -1;
d1681e
+
d1681e
 
d1681e
         this = THIS;
d1681e
         GF_ASSERT (this);
d1681e
@@ -6130,24 +6138,23 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
d1681e
 
d1681e
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
d1681e
 
d1681e
-        ret = sys_statvfs (brickinfo->path, &brickstat);
d1681e
-        if (ret) {
d1681e
-                gf_msg (this->name, GF_LOG_ERROR,
d1681e
-                        errno, GD_MSG_BRICKINFO_CREATE_FAIL,
d1681e
-                        "failed to get statfs() call on brick %s",
d1681e
-                        brickinfo->path);
d1681e
+        /* Compare volume-id xattr is helpful to ensure the existence of a brick_root
d1681e
+           path before the start/attach a brick
d1681e
+        */
d1681e
+        size = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, volid, 16);
d1681e
+        if (size != 16) {
d1681e
+                gf_log (this->name, GF_LOG_ERROR,
d1681e
+                        "Missing %s extended attribute on brick root (%s),"
d1681e
+                        " brick is deemed not to be a part of the volume (%s) ",
d1681e
+                        GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname);
d1681e
                 goto out;
d1681e
         }
d1681e
 
d1681e
-        /* Compare fsid is helpful to ensure the existence of a brick_root
d1681e
-           path before the start/attach a brick
d1681e
-        */
d1681e
-        if (brickinfo->statfs_fsid &&
d1681e
-            (brickinfo->statfs_fsid != brickstat.f_fsid)) {
d1681e
+        if (strncmp (uuid_utoa (volinfo->volume_id), uuid_utoa(volid), GF_UUID_BUF_SIZE)) {
d1681e
                 gf_log (this->name, GF_LOG_ERROR,
d1681e
-                        "fsid comparison is failed it means Brick root path"
d1681e
-                        " %s is not created by glusterd, start/attach will also fail",
d1681e
-                        brickinfo->path);
d1681e
+                        "Mismatching %s extended attribute on brick root (%s),"
d1681e
+                        " brick is deemed not to be a part of the volume (%s)",
d1681e
+                        GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname);
d1681e
                 goto out;
d1681e
         }
d1681e
         is_service_running = gf_is_service_running (pidfile, &pid;;
d1681e
-- 
d1681e
1.8.3.1
d1681e