Blob Blame History Raw
From 4c7460554f2f7c6515b77e70c878fc15d3813237 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawal@redhat.com>
Date: Sat, 4 Aug 2018 12:05:03 +0530
Subject: [PATCH 344/351] glusterd: Compare volume_id before start/attach a
 brick

Problem: After reboot a node brick is not coming up because
         fsid comparison is failed before start a brick

Solution: Instead of comparing fsid compare volume_id to
          resolve the same because fsid is changed after
          reboot a node but volume_id persist as a xattr
          on brick_root path at the time of creating a volume.

> Change-Id: Ic289aab1b4ebfd83bbcae8438fee26ae61a0fff4
> fixes: bz#1612418
> (Cherry pick from commit bd8fc26a278697c30537d879ea5402db7ebab577)
> (Reviwed on upstream link https://review.gluster.org/#/c/glusterfs/+/20638/)

Change-Id: Ia183c1d67af662584debcb301171c35be5ac0d9d
BUG: 1612098
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/146835
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 tests/{basic => bugs/glusterd}/bug-1595320.t |  9 +++---
 xlators/mgmt/glusterd/src/glusterd-utils.c   | 47 ++++++++++++++++------------
 2 files changed, 32 insertions(+), 24 deletions(-)
 rename tests/{basic => bugs/glusterd}/bug-1595320.t (94%)

diff --git a/tests/basic/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t
similarity index 94%
rename from tests/basic/bug-1595320.t
rename to tests/bugs/glusterd/bug-1595320.t
index 9d856ee..f41df9d 100644
--- a/tests/basic/bug-1595320.t
+++ b/tests/bugs/glusterd/bug-1595320.t
@@ -1,8 +1,8 @@
 #!/bin/bash
 
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
-. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
 
 cleanup
 
@@ -52,7 +52,7 @@ EXPECT 0 count_brick_processes
 
 # Unmount 3rd brick root from node
 brick_root=$L3
-TEST umount -l $brick_root 2>/dev/null
+_umount_lv 3
 
 # Start the volume only 2 brick should be start
 TEST $CLI volume start $V0 force
@@ -70,6 +70,7 @@ n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
 TEST [ $n -eq 0 ]
 
 # Mount the brick root
+TEST mkdir -p $brick_root
 TEST mount -t xfs -o nouuid  /dev/test_vg_3/brick_lvm $brick_root
 
 # Replace brick_pid file to test brick_attach code
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6f7c787..1752425 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5439,6 +5439,12 @@ attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count,
         frame->local = NULL;
         frame->cookie = NULL;
 
+        if (!iov) {
+              gf_log (frame->this->name, GF_LOG_ERROR, "iov is NULL");
+              ret   = -1;
+              goto out;
+        }
+
         ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
         if (ret < 0) {
                 gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
@@ -6073,17 +6079,19 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
                       gf_boolean_t wait,
                       gf_boolean_t only_connect)
 {
-        int                     ret   = -1;
+        int                      ret   = -1;
         xlator_t                *this = NULL;
         glusterd_brickinfo_t    *other_brick;
         glusterd_conf_t         *conf = NULL;
-        int32_t                 pid                   = -1;
-        char                    pidfile[PATH_MAX]     = {0};
-        char                    socketpath[PATH_MAX]  = {0};
-        char                    *brickpath            = NULL;
+        int32_t                  pid                   = -1;
+        char                     pidfile[PATH_MAX]     = {0};
+        char                     socketpath[PATH_MAX]  = {0};
+        char                    *brickpath             = NULL;
         glusterd_volinfo_t      *other_vol;
-        struct statvfs           brickstat = {0,};
         gf_boolean_t             is_service_running = _gf_false;
+        uuid_t                   volid                 = {0,};
+        ssize_t                  size                  = -1;
+
 
         this = THIS;
         GF_ASSERT (this);
@@ -6130,24 +6138,23 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
 
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
 
-        ret = sys_statvfs (brickinfo->path, &brickstat);
-        if (ret) {
-                gf_msg (this->name, GF_LOG_ERROR,
-                        errno, GD_MSG_BRICKINFO_CREATE_FAIL,
-                        "failed to get statfs() call on brick %s",
-                        brickinfo->path);
+        /* Compare volume-id xattr is helpful to ensure the existence of a brick_root
+           path before the start/attach a brick
+        */
+        size = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, volid, 16);
+        if (size != 16) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Missing %s extended attribute on brick root (%s),"
+                        " brick is deemed not to be a part of the volume (%s) ",
+                        GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname);
                 goto out;
         }
 
-        /* Compare fsid is helpful to ensure the existence of a brick_root
-           path before the start/attach a brick
-        */
-        if (brickinfo->statfs_fsid &&
-            (brickinfo->statfs_fsid != brickstat.f_fsid)) {
+        if (strncmp (uuid_utoa (volinfo->volume_id), uuid_utoa(volid), GF_UUID_BUF_SIZE)) {
                 gf_log (this->name, GF_LOG_ERROR,
-                        "fsid comparison is failed it means Brick root path"
-                        " %s is not created by glusterd, start/attach will also fail",
-                        brickinfo->path);
+                        "Mismatching %s extended attribute on brick root (%s),"
+                        " brick is deemed not to be a part of the volume (%s)",
+                        GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname);
                 goto out;
         }
         is_service_running = gf_is_service_running (pidfile, &pid);
-- 
1.8.3.1