14f8ab
From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001
14f8ab
From: karthik-us <ksubrahm@redhat.com>
14f8ab
Date: Fri, 12 Jul 2019 16:44:20 +0530
14f8ab
Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type
14f8ab
 mismatch
14f8ab
14f8ab
Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/
14f8ab
14f8ab
Problems:
14f8ab
1. When checking for type and gfid mismatch, if the type or gfid
14f8ab
is unknown because of missing gfid handle and the gfid xattr
14f8ab
it will be reported as type or gfid mismatch and the heal will
14f8ab
not complete.
14f8ab
14f8ab
2. If the source selected during entry heal has null gfid the same
14f8ab
will be sent to afr_lookup_and_heal_gfid(). In this function when
14f8ab
we try to assign the gfid on the bricks where it does not exist,
14f8ab
we are considering the same gfid and try to assign that on those
14f8ab
bricks. This will fail in posix_gfid_set() since the gfid sent
14f8ab
is null.
14f8ab
14f8ab
Fix:
14f8ab
If the gfid sent to afr_lookup_and_heal_gfid() is null choose a
14f8ab
valid gfid before proceeding to assign the gfid on the bricks
14f8ab
where it is missing.
14f8ab
14f8ab
In afr_selfheal_detect_gfid_and_type_mismatch(), do not report
14f8ab
type/gfid mismatch if the type/gfid is unknown or not set.
14f8ab
14f8ab
Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af
14f8ab
fixes: bz#1715447
14f8ab
Signed-off-by: karthik-us <ksubrahm@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/175966
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 .../bug-1722507-type-mismatch-error-handling.t     | 116 +++++++++++++++++++++
14f8ab
 xlators/cluster/afr/src/afr-self-heal-common.c     |  12 ++-
14f8ab
 xlators/cluster/afr/src/afr-self-heal-entry.c      |  13 +++
14f8ab
 3 files changed, 139 insertions(+), 2 deletions(-)
14f8ab
 create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
14f8ab
14f8ab
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
14f8ab
new file mode 100644
14f8ab
index 0000000..0aeaaaf
14f8ab
--- /dev/null
14f8ab
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
14f8ab
@@ -0,0 +1,116 @@
14f8ab
+#!/bin/bash
14f8ab
+
14f8ab
+. $(dirname $0)/../../include.rc
14f8ab
+. $(dirname $0)/../../volume.rc
14f8ab
+. $(dirname $0)/../../afr.rc
14f8ab
+
14f8ab
+cleanup;
14f8ab
+
14f8ab
+## Start and create a volume
14f8ab
+TEST glusterd;
14f8ab
+TEST pidof glusterd;
14f8ab
+TEST $CLI volume info;
14f8ab
+
14f8ab
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
14f8ab
+TEST $CLI volume start $V0;
14f8ab
+TEST $CLI volume set $V0 cluster.heal-timeout 5
14f8ab
+TEST $CLI volume heal $V0 disable
14f8ab
+EXPECT 'Started' volinfo_field $V0 'Status';
14f8ab
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
14f8ab
+
14f8ab
+TEST mkdir $M0/dir
14f8ab
+
14f8ab
+##########################################################################################
14f8ab
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
14f8ab
+
14f8ab
+TEST touch $M0/dir/file
14f8ab
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
14f8ab
+
14f8ab
+#B0 and B2 must blame B1
14f8ab
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
14f8ab
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
14f8ab
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
14f8ab
+
14f8ab
+# Add entry to xattrop dir to trigger index heal.
14f8ab
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
14f8ab
+base_entry_b0=`ls $xattrop_dir0`
14f8ab
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
14f8ab
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
14f8ab
+EXPECT "^1$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+# Remove the gfid xattr and the link file on one brick.
14f8ab
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
14f8ab
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
14f8ab
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
14f8ab
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+
14f8ab
+# Launch heal
14f8ab
+TEST $CLI volume heal $V0 enable
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
14f8ab
+
14f8ab
+# Wait for 2 second to force posix to consider that this is a valid file but
14f8ab
+# without gfid.
14f8ab
+sleep 2
14f8ab
+TEST $CLI volume heal $V0
14f8ab
+
14f8ab
+# Heal should not fail as the file is missing gfid xattr and the link file,
14f8ab
+# which is not actually the gfid or type mismatch.
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
14f8ab
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+rm -f $M0/dir/file
14f8ab
+
14f8ab
+
14f8ab
+###########################################################################################
14f8ab
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
14f8ab
+
14f8ab
+TEST $CLI volume heal $V0 disable
14f8ab
+TEST touch $M0/dir/file
14f8ab
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
14f8ab
+
14f8ab
+#B0 and B2 must blame B1
14f8ab
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
14f8ab
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
14f8ab
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
14f8ab
+
14f8ab
+# Add entry to xattrop dir to trigger index heal.
14f8ab
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
14f8ab
+base_entry_b0=`ls $xattrop_dir0`
14f8ab
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
14f8ab
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
14f8ab
+EXPECT "^1$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+# Remove the gfid xattr and the link file on two bricks.
14f8ab
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
14f8ab
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
14f8ab
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
14f8ab
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
14f8ab
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+
14f8ab
+# Launch heal
14f8ab
+TEST $CLI volume heal $V0 enable
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
14f8ab
+
14f8ab
+# Wait for 2 second to force posix to consider that this is a valid file but
14f8ab
+# without gfid.
14f8ab
+sleep 2
14f8ab
+TEST $CLI volume heal $V0
14f8ab
+
14f8ab
+# Heal should not fail as the file is missing gfid xattr and the link file,
14f8ab
+# which is not actually the gfid or type mismatch.
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
14f8ab
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
14f8ab
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
14f8ab
+
14f8ab
+cleanup
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
index 5157e7d..b38085a 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
14f8ab
     for (i = 0; i < priv->child_count; i++) {
14f8ab
         if (source == -1) {
14f8ab
             /* case (a) above. */
14f8ab
-            if (replies[i].valid && replies[i].op_ret == 0) {
14f8ab
+            if (replies[i].valid && replies[i].op_ret == 0 &&
14f8ab
+                replies[i].poststat.ia_type != IA_INVAL) {
14f8ab
                 ia_type = replies[i].poststat.ia_type;
14f8ab
                 break;
14f8ab
             }
14f8ab
@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
14f8ab
             /* case (b) above. */
14f8ab
             if (i == source)
14f8ab
                 continue;
14f8ab
-            if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
14f8ab
+            if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
14f8ab
+                replies[i].poststat.ia_type != IA_INVAL) {
14f8ab
                 ia_type = replies[i].poststat.ia_type;
14f8ab
                 break;
14f8ab
             }
14f8ab
@@ -77,6 +79,12 @@ heal:
14f8ab
     for (i = 0; i < priv->child_count; i++) {
14f8ab
         if (!replies[i].valid || replies[i].op_ret != 0)
14f8ab
             continue;
14f8ab
+
14f8ab
+        if (gf_uuid_is_null(gfid) &&
14f8ab
+            !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
14f8ab
+            replies[i].poststat.ia_type == ia_type)
14f8ab
+            gfid = replies[i].poststat.ia_gfid;
14f8ab
+
14f8ab
         if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
14f8ab
             replies[i].poststat.ia_type != ia_type)
14f8ab
             continue;
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
index a6890fa..e07b521 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
14f8ab
         if (replies[i].op_ret != 0)
14f8ab
             continue;
14f8ab
 
14f8ab
+        if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
14f8ab
+            continue;
14f8ab
+
14f8ab
+        if (replies[i].poststat.ia_type == IA_INVAL)
14f8ab
+            continue;
14f8ab
+
14f8ab
+        if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
14f8ab
+            src_idx = i;
14f8ab
+            ia_type = replies[src_idx].poststat.ia_type;
14f8ab
+            gfid = &replies[src_idx].poststat.ia_gfid;
14f8ab
+            continue;
14f8ab
+        }
14f8ab
+
14f8ab
         if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
14f8ab
             (ia_type == replies[i].poststat.ia_type)) {
14f8ab
             ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab