887953
From cb4bf2bb3721e66c28a01be6ffff54a6f8610f0e Mon Sep 17 00:00:00 2001
887953
From: Sanju Rakonde <srakonde@redhat.com>
887953
Date: Tue, 25 Sep 2018 23:36:48 +0530
887953
Subject: [PATCH 386/387] glusterd: make sure that brickinfo->uuid is not null
887953
887953
Problem: After an upgrade from the version where shared-brick-count
887953
option is not present to a version which introduced this option
887953
causes issue at the mount point i.e, size of the volume at mount
887953
point will be reduced by shared-brick-count value times.
887953
887953
Cause: shared-brick-count is equal to the number of bricks that
887953
are sharing the file system. gd_set_shared_brick_count() calculates
887953
the shared-brick-count value based on uuid of the node and fsid of
887953
the brick. https://review.gluster.org/#/c/glusterfs/+/19484 handles
887953
setting of fsid properly during an upgrade path. This patch assumed
887953
that when the code path is reached, brickinfo->uuid is non-null.
887953
But brickinfo->uuid is null for all the bricks, as the uuid is null
887953
https://review.gluster.org/#/c/glusterfs/+/19484 couldn't reached the
887953
code path to set the fsid for bricks. So, we had fsid as 0 for all
887953
bricks, which resulted in gd_set_shared_brick_count() to calculate
887953
shared-brick-count in a wrong way. i.e, the logic written in
887953
gd_set_shared_brick_count() didn't work as expected since fsid is 0.
887953
887953
Solution: Before control reaches the code path written by
887953
https://review.gluster.org/#/c/glusterfs/+/19484,
887953
adding a check for whether brickinfo->uuid is null and
887953
if brickinfo->uuid is having null value, calling
887953
glusterd_resolve_brick will set the brickinfo->uuid to a
887953
proper value. When we have proper uuid, fsid for the bricks
887953
will be set properly and shared-brick-count value will be
887953
caluculated correctly.
887953
887953
Please take a look at the bug https://bugzilla.redhat.com/show_bug.cgi?id=1632889
887953
for complete RCA
887953
887953
Steps followed to test the fix:
887953
1. Created a 2 node cluster, the cluster is running with binary
887953
which doesn't have shared-brick-count option
887953
2. Created a 2x(2+1) volume and started it
887953
3. Mouted the volume, checked size of volume using df
887953
4. Upgrade to a version where shared-brick-count is introduced
887953
(upgraded the nodes one by one i.e, stop the glusterd, upgrade the node
887953
and start the glusterd).
887953
5. after upgrading both the nodes, bumped up the cluster.op-version
887953
6. At mount point, df shows the correct size for volume.
887953
887953
updtream patch: https://review.gluster.org/#/c/glusterfs/+/21278/
887953
887953
> fixes: bz#1632889
887953
> Change-Id: Ib9f078aafb15e899a01086eae113270657ea916b
887953
> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
887953
887953
Change-Id: Ib9f078aafb15e899a01086eae113270657ea916b
887953
BUG: 1630997
887953
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/151321
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
887953
---
887953
 xlators/mgmt/glusterd/src/glusterd-store.c | 3 ++-
887953
 1 file changed, 2 insertions(+), 1 deletion(-)
887953
887953
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
887953
index 387e7e5..015f6c2 100644
887953
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
887953
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
887953
@@ -2609,6 +2609,8 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
887953
                  * snapshot or snapshot restored volume this would be done post
887953
                  * creating the brick mounts
887953
                  */
887953
+                if (gf_uuid_is_null(brickinfo->uuid))
887953
+                        (void)glusterd_resolve_brick(brickinfo);
887953
                 if (brickinfo->real_path[0] == '\0' && !volinfo->is_snap_volume
887953
                     && gf_uuid_is_null (volinfo->restored_from_snap)) {
887953
                         /* By now if the brick is a local brick then it will be
887953
@@ -2617,7 +2619,6 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
887953
                          * with MY_UUID for realpath check. Hence do not handle
887953
                          * error
887953
                          */
887953
-                        (void)glusterd_resolve_brick (brickinfo);
887953
                         if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
887953
                                 if (!realpath (brickinfo->path, abspath)) {
887953
                                         gf_msg (this->name, GF_LOG_CRITICAL,
887953
-- 
887953
1.8.3.1
887953