74b1de
From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001
74b1de
From: karthik-us <ksubrahm@redhat.com>
74b1de
Date: Fri, 12 Jul 2019 16:44:20 +0530
74b1de
Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type
74b1de
 mismatch
74b1de
74b1de
Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/
74b1de
74b1de
Problems:
74b1de
1. When checking for type and gfid mismatch, if the type or gfid
74b1de
is unknown because of missing gfid handle and the gfid xattr
74b1de
it will be reported as type or gfid mismatch and the heal will
74b1de
not complete.
74b1de
74b1de
2. If the source selected during entry heal has null gfid the same
74b1de
will be sent to afr_lookup_and_heal_gfid(). In this function when
74b1de
we try to assign the gfid on the bricks where it does not exist,
74b1de
we are considering the same gfid and try to assign that on those
74b1de
bricks. This will fail in posix_gfid_set() since the gfid sent
74b1de
is null.
74b1de
74b1de
Fix:
74b1de
If the gfid sent to afr_lookup_and_heal_gfid() is null choose a
74b1de
valid gfid before proceeding to assign the gfid on the bricks
74b1de
where it is missing.
74b1de
74b1de
In afr_selfheal_detect_gfid_and_type_mismatch(), do not report
74b1de
type/gfid mismatch if the type/gfid is unknown or not set.
74b1de
74b1de
Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af
74b1de
fixes: bz#1715447
74b1de
Signed-off-by: karthik-us <ksubrahm@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/175966
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 .../bug-1722507-type-mismatch-error-handling.t     | 116 +++++++++++++++++++++
74b1de
 xlators/cluster/afr/src/afr-self-heal-common.c     |  12 ++-
74b1de
 xlators/cluster/afr/src/afr-self-heal-entry.c      |  13 +++
74b1de
 3 files changed, 139 insertions(+), 2 deletions(-)
74b1de
 create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74b1de
74b1de
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74b1de
new file mode 100644
74b1de
index 0000000..0aeaaaf
74b1de
--- /dev/null
74b1de
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74b1de
@@ -0,0 +1,116 @@
74b1de
+#!/bin/bash
74b1de
+
74b1de
+. $(dirname $0)/../../include.rc
74b1de
+. $(dirname $0)/../../volume.rc
74b1de
+. $(dirname $0)/../../afr.rc
74b1de
+
74b1de
+cleanup;
74b1de
+
74b1de
+## Start and create a volume
74b1de
+TEST glusterd;
74b1de
+TEST pidof glusterd;
74b1de
+TEST $CLI volume info;
74b1de
+
74b1de
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
74b1de
+TEST $CLI volume start $V0;
74b1de
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74b1de
+TEST $CLI volume heal $V0 disable
74b1de
+EXPECT 'Started' volinfo_field $V0 'Status';
74b1de
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
74b1de
+
74b1de
+TEST mkdir $M0/dir
74b1de
+
74b1de
+##########################################################################################
74b1de
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
74b1de
+
74b1de
+TEST touch $M0/dir/file
74b1de
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
74b1de
+
74b1de
+#B0 and B2 must blame B1
74b1de
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74b1de
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74b1de
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74b1de
+
74b1de
+# Add entry to xattrop dir to trigger index heal.
74b1de
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74b1de
+base_entry_b0=`ls $xattrop_dir0`
74b1de
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74b1de
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74b1de
+EXPECT "^1$" get_pending_heal_count $V0
74b1de
+
74b1de
+# Remove the gfid xattr and the link file on one brick.
74b1de
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
74b1de
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
74b1de
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
74b1de
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+
74b1de
+# Launch heal
74b1de
+TEST $CLI volume heal $V0 enable
74b1de
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74b1de
+
74b1de
+# Wait for 2 second to force posix to consider that this is a valid file but
74b1de
+# without gfid.
74b1de
+sleep 2
74b1de
+TEST $CLI volume heal $V0
74b1de
+
74b1de
+# Heal should not fail as the file is missing gfid xattr and the link file,
74b1de
+# which is not actually the gfid or type mismatch.
74b1de
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74b1de
+
74b1de
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
74b1de
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+rm -f $M0/dir/file
74b1de
+
74b1de
+
74b1de
+###########################################################################################
74b1de
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
74b1de
+
74b1de
+TEST $CLI volume heal $V0 disable
74b1de
+TEST touch $M0/dir/file
74b1de
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
74b1de
+
74b1de
+#B0 and B2 must blame B1
74b1de
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74b1de
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74b1de
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74b1de
+
74b1de
+# Add entry to xattrop dir to trigger index heal.
74b1de
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74b1de
+base_entry_b0=`ls $xattrop_dir0`
74b1de
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74b1de
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74b1de
+EXPECT "^1$" get_pending_heal_count $V0
74b1de
+
74b1de
+# Remove the gfid xattr and the link file on two bricks.
74b1de
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
74b1de
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
74b1de
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
74b1de
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
74b1de
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+
74b1de
+# Launch heal
74b1de
+TEST $CLI volume heal $V0 enable
74b1de
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74b1de
+
74b1de
+# Wait for 2 second to force posix to consider that this is a valid file but
74b1de
+# without gfid.
74b1de
+sleep 2
74b1de
+TEST $CLI volume heal $V0
74b1de
+
74b1de
+# Heal should not fail as the file is missing gfid xattr and the link file,
74b1de
+# which is not actually the gfid or type mismatch.
74b1de
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74b1de
+
74b1de
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
74b1de
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
74b1de
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74b1de
+
74b1de
+cleanup
74b1de
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
74b1de
index 5157e7d..b38085a 100644
74b1de
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
74b1de
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
74b1de
@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
74b1de
     for (i = 0; i < priv->child_count; i++) {
74b1de
         if (source == -1) {
74b1de
             /* case (a) above. */
74b1de
-            if (replies[i].valid && replies[i].op_ret == 0) {
74b1de
+            if (replies[i].valid && replies[i].op_ret == 0 &&
74b1de
+                replies[i].poststat.ia_type != IA_INVAL) {
74b1de
                 ia_type = replies[i].poststat.ia_type;
74b1de
                 break;
74b1de
             }
74b1de
@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
74b1de
             /* case (b) above. */
74b1de
             if (i == source)
74b1de
                 continue;
74b1de
-            if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
74b1de
+            if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
74b1de
+                replies[i].poststat.ia_type != IA_INVAL) {
74b1de
                 ia_type = replies[i].poststat.ia_type;
74b1de
                 break;
74b1de
             }
74b1de
@@ -77,6 +79,12 @@ heal:
74b1de
     for (i = 0; i < priv->child_count; i++) {
74b1de
         if (!replies[i].valid || replies[i].op_ret != 0)
74b1de
             continue;
74b1de
+
74b1de
+        if (gf_uuid_is_null(gfid) &&
74b1de
+            !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
74b1de
+            replies[i].poststat.ia_type == ia_type)
74b1de
+            gfid = replies[i].poststat.ia_gfid;
74b1de
+
74b1de
         if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
74b1de
             replies[i].poststat.ia_type != ia_type)
74b1de
             continue;
74b1de
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
index a6890fa..e07b521 100644
74b1de
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
74b1de
         if (replies[i].op_ret != 0)
74b1de
             continue;
74b1de
 
74b1de
+        if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
74b1de
+            continue;
74b1de
+
74b1de
+        if (replies[i].poststat.ia_type == IA_INVAL)
74b1de
+            continue;
74b1de
+
74b1de
+        if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
74b1de
+            src_idx = i;
74b1de
+            ia_type = replies[src_idx].poststat.ia_type;
74b1de
+            gfid = &replies[src_idx].poststat.ia_gfid;
74b1de
+            continue;
74b1de
+        }
74b1de
+
74b1de
         if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
74b1de
             (ia_type == replies[i].poststat.ia_type)) {
74b1de
             ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
74b1de
-- 
74b1de
1.8.3.1
74b1de