50dc83
From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001
50dc83
From: karthik-us <ksubrahm@redhat.com>
50dc83
Date: Fri, 12 Jul 2019 16:44:20 +0530
50dc83
Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type
50dc83
 mismatch
50dc83
50dc83
Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/
50dc83
50dc83
Problems:
50dc83
1. When checking for type and gfid mismatch, if the type or gfid
50dc83
is unknown because of missing gfid handle and the gfid xattr
50dc83
it will be reported as type or gfid mismatch and the heal will
50dc83
not complete.
50dc83
50dc83
2. If the source selected during entry heal has null gfid the same
50dc83
will be sent to afr_lookup_and_heal_gfid(). In this function when
50dc83
we try to assign the gfid on the bricks where it does not exist,
50dc83
we are considering the same gfid and try to assign that on those
50dc83
bricks. This will fail in posix_gfid_set() since the gfid sent
50dc83
is null.
50dc83
50dc83
Fix:
50dc83
If the gfid sent to afr_lookup_and_heal_gfid() is null choose a
50dc83
valid gfid before proceeding to assign the gfid on the bricks
50dc83
where it is missing.
50dc83
50dc83
In afr_selfheal_detect_gfid_and_type_mismatch(), do not report
50dc83
type/gfid mismatch if the type/gfid is unknown or not set.
50dc83
50dc83
Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af
50dc83
fixes: bz#1715447
50dc83
Signed-off-by: karthik-us <ksubrahm@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/175966
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
50dc83
---
50dc83
 .../bug-1722507-type-mismatch-error-handling.t     | 116 +++++++++++++++++++++
50dc83
 xlators/cluster/afr/src/afr-self-heal-common.c     |  12 ++-
50dc83
 xlators/cluster/afr/src/afr-self-heal-entry.c      |  13 +++
50dc83
 3 files changed, 139 insertions(+), 2 deletions(-)
50dc83
 create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
50dc83
50dc83
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
50dc83
new file mode 100644
50dc83
index 0000000..0aeaaaf
50dc83
--- /dev/null
50dc83
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
50dc83
@@ -0,0 +1,116 @@
50dc83
+#!/bin/bash
50dc83
+
50dc83
+. $(dirname $0)/../../include.rc
50dc83
+. $(dirname $0)/../../volume.rc
50dc83
+. $(dirname $0)/../../afr.rc
50dc83
+
50dc83
+cleanup;
50dc83
+
50dc83
+## Start and create a volume
50dc83
+TEST glusterd;
50dc83
+TEST pidof glusterd;
50dc83
+TEST $CLI volume info;
50dc83
+
50dc83
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
50dc83
+TEST $CLI volume start $V0;
50dc83
+TEST $CLI volume set $V0 cluster.heal-timeout 5
50dc83
+TEST $CLI volume heal $V0 disable
50dc83
+EXPECT 'Started' volinfo_field $V0 'Status';
50dc83
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
50dc83
+
50dc83
+TEST mkdir $M0/dir
50dc83
+
50dc83
+##########################################################################################
50dc83
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
50dc83
+
50dc83
+TEST touch $M0/dir/file
50dc83
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
50dc83
+
50dc83
+#B0 and B2 must blame B1
50dc83
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
50dc83
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
50dc83
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
50dc83
+
50dc83
+# Add entry to xattrop dir to trigger index heal.
50dc83
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
50dc83
+base_entry_b0=`ls $xattrop_dir0`
50dc83
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
50dc83
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
50dc83
+EXPECT "^1$" get_pending_heal_count $V0
50dc83
+
50dc83
+# Remove the gfid xattr and the link file on one brick.
50dc83
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
50dc83
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
50dc83
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
50dc83
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+
50dc83
+# Launch heal
50dc83
+TEST $CLI volume heal $V0 enable
50dc83
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
50dc83
+
50dc83
+# Wait for 2 second to force posix to consider that this is a valid file but
50dc83
+# without gfid.
50dc83
+sleep 2
50dc83
+TEST $CLI volume heal $V0
50dc83
+
50dc83
+# Heal should not fail as the file is missing gfid xattr and the link file,
50dc83
+# which is not actually the gfid or type mismatch.
50dc83
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
50dc83
+
50dc83
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
50dc83
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+rm -f $M0/dir/file
50dc83
+
50dc83
+
50dc83
+###########################################################################################
50dc83
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
50dc83
+
50dc83
+TEST $CLI volume heal $V0 disable
50dc83
+TEST touch $M0/dir/file
50dc83
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
50dc83
+
50dc83
+#B0 and B2 must blame B1
50dc83
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
50dc83
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
50dc83
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
50dc83
+
50dc83
+# Add entry to xattrop dir to trigger index heal.
50dc83
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
50dc83
+base_entry_b0=`ls $xattrop_dir0`
50dc83
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
50dc83
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
50dc83
+EXPECT "^1$" get_pending_heal_count $V0
50dc83
+
50dc83
+# Remove the gfid xattr and the link file on two bricks.
50dc83
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
50dc83
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
50dc83
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
50dc83
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
50dc83
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+
50dc83
+# Launch heal
50dc83
+TEST $CLI volume heal $V0 enable
50dc83
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
50dc83
+
50dc83
+# Wait for 2 second to force posix to consider that this is a valid file but
50dc83
+# without gfid.
50dc83
+sleep 2
50dc83
+TEST $CLI volume heal $V0
50dc83
+
50dc83
+# Heal should not fail as the file is missing gfid xattr and the link file,
50dc83
+# which is not actually the gfid or type mismatch.
50dc83
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
50dc83
+
50dc83
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
50dc83
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
50dc83
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
50dc83
+
50dc83
+cleanup
50dc83
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
50dc83
index 5157e7d..b38085a 100644
50dc83
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
50dc83
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
50dc83
@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
50dc83
     for (i = 0; i < priv->child_count; i++) {
50dc83
         if (source == -1) {
50dc83
             /* case (a) above. */
50dc83
-            if (replies[i].valid && replies[i].op_ret == 0) {
50dc83
+            if (replies[i].valid && replies[i].op_ret == 0 &&
50dc83
+                replies[i].poststat.ia_type != IA_INVAL) {
50dc83
                 ia_type = replies[i].poststat.ia_type;
50dc83
                 break;
50dc83
             }
50dc83
@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
50dc83
             /* case (b) above. */
50dc83
             if (i == source)
50dc83
                 continue;
50dc83
-            if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
50dc83
+            if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
50dc83
+                replies[i].poststat.ia_type != IA_INVAL) {
50dc83
                 ia_type = replies[i].poststat.ia_type;
50dc83
                 break;
50dc83
             }
50dc83
@@ -77,6 +79,12 @@ heal:
50dc83
     for (i = 0; i < priv->child_count; i++) {
50dc83
         if (!replies[i].valid || replies[i].op_ret != 0)
50dc83
             continue;
50dc83
+
50dc83
+        if (gf_uuid_is_null(gfid) &&
50dc83
+            !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
50dc83
+            replies[i].poststat.ia_type == ia_type)
50dc83
+            gfid = replies[i].poststat.ia_gfid;
50dc83
+
50dc83
         if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
50dc83
             replies[i].poststat.ia_type != ia_type)
50dc83
             continue;
50dc83
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
50dc83
index a6890fa..e07b521 100644
50dc83
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
50dc83
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
50dc83
@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
50dc83
         if (replies[i].op_ret != 0)
50dc83
             continue;
50dc83
 
50dc83
+        if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
50dc83
+            continue;
50dc83
+
50dc83
+        if (replies[i].poststat.ia_type == IA_INVAL)
50dc83
+            continue;
50dc83
+
50dc83
+        if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
50dc83
+            src_idx = i;
50dc83
+            ia_type = replies[src_idx].poststat.ia_type;
50dc83
+            gfid = &replies[src_idx].poststat.ia_gfid;
50dc83
+            continue;
50dc83
+        }
50dc83
+
50dc83
         if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
50dc83
             (ia_type == replies[i].poststat.ia_type)) {
50dc83
             ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
50dc83
-- 
50dc83
1.8.3.1
50dc83