|
|
14f8ab |
From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001
|
|
|
14f8ab |
From: karthik-us <ksubrahm@redhat.com>
|
|
|
14f8ab |
Date: Fri, 12 Jul 2019 16:44:20 +0530
|
|
|
14f8ab |
Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type
|
|
|
14f8ab |
mismatch
|
|
|
14f8ab |
|
|
|
14f8ab |
Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/
|
|
|
14f8ab |
|
|
|
14f8ab |
Problems:
|
|
|
14f8ab |
1. When checking for type and gfid mismatch, if the type or gfid
|
|
|
14f8ab |
is unknown because of missing gfid handle and the gfid xattr
|
|
|
14f8ab |
it will be reported as type or gfid mismatch and the heal will
|
|
|
14f8ab |
not complete.
|
|
|
14f8ab |
|
|
|
14f8ab |
2. If the source selected during entry heal has null gfid the same
|
|
|
14f8ab |
will be sent to afr_lookup_and_heal_gfid(). In this function when
|
|
|
14f8ab |
we try to assign the gfid on the bricks where it does not exist,
|
|
|
14f8ab |
we are considering the same gfid and try to assign that on those
|
|
|
14f8ab |
bricks. This will fail in posix_gfid_set() since the gfid sent
|
|
|
14f8ab |
is null.
|
|
|
14f8ab |
|
|
|
14f8ab |
Fix:
|
|
|
14f8ab |
If the gfid sent to afr_lookup_and_heal_gfid() is null choose a
|
|
|
14f8ab |
valid gfid before proceeding to assign the gfid on the bricks
|
|
|
14f8ab |
where it is missing.
|
|
|
14f8ab |
|
|
|
14f8ab |
In afr_selfheal_detect_gfid_and_type_mismatch(), do not report
|
|
|
14f8ab |
type/gfid mismatch if the type/gfid is unknown or not set.
|
|
|
14f8ab |
|
|
|
14f8ab |
Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af
|
|
|
14f8ab |
fixes: bz#1715447
|
|
|
14f8ab |
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
|
14f8ab |
Reviewed-on: https://code.engineering.redhat.com/gerrit/175966
|
|
|
14f8ab |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
14f8ab |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
14f8ab |
---
|
|
|
14f8ab |
.../bug-1722507-type-mismatch-error-handling.t | 116 +++++++++++++++++++++
|
|
|
14f8ab |
xlators/cluster/afr/src/afr-self-heal-common.c | 12 ++-
|
|
|
14f8ab |
xlators/cluster/afr/src/afr-self-heal-entry.c | 13 +++
|
|
|
14f8ab |
3 files changed, 139 insertions(+), 2 deletions(-)
|
|
|
14f8ab |
create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
14f8ab |
|
|
|
14f8ab |
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
14f8ab |
new file mode 100644
|
|
|
14f8ab |
index 0000000..0aeaaaf
|
|
|
14f8ab |
--- /dev/null
|
|
|
14f8ab |
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
14f8ab |
@@ -0,0 +1,116 @@
|
|
|
14f8ab |
+#!/bin/bash
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+. $(dirname $0)/../../include.rc
|
|
|
14f8ab |
+. $(dirname $0)/../../volume.rc
|
|
|
14f8ab |
+. $(dirname $0)/../../afr.rc
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+cleanup;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+## Start and create a volume
|
|
|
14f8ab |
+TEST glusterd;
|
|
|
14f8ab |
+TEST pidof glusterd;
|
|
|
14f8ab |
+TEST $CLI volume info;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
|
|
14f8ab |
+TEST $CLI volume start $V0;
|
|
|
14f8ab |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
14f8ab |
+TEST $CLI volume heal $V0 disable
|
|
|
14f8ab |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
14f8ab |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+TEST mkdir $M0/dir
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+##########################################################################################
|
|
|
14f8ab |
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+TEST touch $M0/dir/file
|
|
|
14f8ab |
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+#B0 and B2 must blame B1
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Add entry to xattrop dir to trigger index heal.
|
|
|
14f8ab |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
14f8ab |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
14f8ab |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
14f8ab |
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
14f8ab |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Remove the gfid xattr and the link file on one brick.
|
|
|
14f8ab |
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
|
|
|
14f8ab |
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
|
|
|
14f8ab |
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
|
|
|
14f8ab |
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Launch heal
|
|
|
14f8ab |
+TEST $CLI volume heal $V0 enable
|
|
|
14f8ab |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Wait for 2 second to force posix to consider that this is a valid file but
|
|
|
14f8ab |
+# without gfid.
|
|
|
14f8ab |
+sleep 2
|
|
|
14f8ab |
+TEST $CLI volume heal $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Heal should not fail as the file is missing gfid xattr and the link file,
|
|
|
14f8ab |
+# which is not actually the gfid or type mismatch.
|
|
|
14f8ab |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
|
|
|
14f8ab |
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+rm -f $M0/dir/file
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+###########################################################################################
|
|
|
14f8ab |
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+TEST $CLI volume heal $V0 disable
|
|
|
14f8ab |
+TEST touch $M0/dir/file
|
|
|
14f8ab |
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+#B0 and B2 must blame B1
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
14f8ab |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Add entry to xattrop dir to trigger index heal.
|
|
|
14f8ab |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
14f8ab |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
14f8ab |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
14f8ab |
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
14f8ab |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Remove the gfid xattr and the link file on two bricks.
|
|
|
14f8ab |
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
|
|
|
14f8ab |
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
|
|
|
14f8ab |
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
|
|
|
14f8ab |
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
|
|
|
14f8ab |
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Launch heal
|
|
|
14f8ab |
+TEST $CLI volume heal $V0 enable
|
|
|
14f8ab |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
14f8ab |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Wait for 2 second to force posix to consider that this is a valid file but
|
|
|
14f8ab |
+# without gfid.
|
|
|
14f8ab |
+sleep 2
|
|
|
14f8ab |
+TEST $CLI volume heal $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+# Heal should not fail as the file is missing gfid xattr and the link file,
|
|
|
14f8ab |
+# which is not actually the gfid or type mismatch.
|
|
|
14f8ab |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
|
|
|
14f8ab |
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
|
|
|
14f8ab |
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+cleanup
|
|
|
14f8ab |
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
14f8ab |
index 5157e7d..b38085a 100644
|
|
|
14f8ab |
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
14f8ab |
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
14f8ab |
@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
|
|
|
14f8ab |
for (i = 0; i < priv->child_count; i++) {
|
|
|
14f8ab |
if (source == -1) {
|
|
|
14f8ab |
/* case (a) above. */
|
|
|
14f8ab |
- if (replies[i].valid && replies[i].op_ret == 0) {
|
|
|
14f8ab |
+ if (replies[i].valid && replies[i].op_ret == 0 &&
|
|
|
14f8ab |
+ replies[i].poststat.ia_type != IA_INVAL) {
|
|
|
14f8ab |
ia_type = replies[i].poststat.ia_type;
|
|
|
14f8ab |
break;
|
|
|
14f8ab |
}
|
|
|
14f8ab |
@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
|
|
|
14f8ab |
/* case (b) above. */
|
|
|
14f8ab |
if (i == source)
|
|
|
14f8ab |
continue;
|
|
|
14f8ab |
- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
|
|
|
14f8ab |
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
|
|
|
14f8ab |
+ replies[i].poststat.ia_type != IA_INVAL) {
|
|
|
14f8ab |
ia_type = replies[i].poststat.ia_type;
|
|
|
14f8ab |
break;
|
|
|
14f8ab |
}
|
|
|
14f8ab |
@@ -77,6 +79,12 @@ heal:
|
|
|
14f8ab |
for (i = 0; i < priv->child_count; i++) {
|
|
|
14f8ab |
if (!replies[i].valid || replies[i].op_ret != 0)
|
|
|
14f8ab |
continue;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+ if (gf_uuid_is_null(gfid) &&
|
|
|
14f8ab |
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
|
|
|
14f8ab |
+ replies[i].poststat.ia_type == ia_type)
|
|
|
14f8ab |
+ gfid = replies[i].poststat.ia_gfid;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
|
|
|
14f8ab |
replies[i].poststat.ia_type != ia_type)
|
|
|
14f8ab |
continue;
|
|
|
14f8ab |
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
14f8ab |
index a6890fa..e07b521 100644
|
|
|
14f8ab |
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
14f8ab |
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
14f8ab |
@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
|
|
|
14f8ab |
if (replies[i].op_ret != 0)
|
|
|
14f8ab |
continue;
|
|
|
14f8ab |
|
|
|
14f8ab |
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
|
|
|
14f8ab |
+ continue;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+ if (replies[i].poststat.ia_type == IA_INVAL)
|
|
|
14f8ab |
+ continue;
|
|
|
14f8ab |
+
|
|
|
14f8ab |
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
|
|
|
14f8ab |
+ src_idx = i;
|
|
|
14f8ab |
+ ia_type = replies[src_idx].poststat.ia_type;
|
|
|
14f8ab |
+ gfid = &replies[src_idx].poststat.ia_gfid;
|
|
|
14f8ab |
+ continue;
|
|
|
14f8ab |
+ }
|
|
|
14f8ab |
+
|
|
|
14f8ab |
if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
|
|
|
14f8ab |
(ia_type == replies[i].poststat.ia_type)) {
|
|
|
14f8ab |
ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
|
|
|
14f8ab |
--
|
|
|
14f8ab |
1.8.3.1
|
|
|
14f8ab |
|