|
|
ca3909 |
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
|
|
|
ca3909 |
From: karthik-us <ksubrahm@redhat.com>
|
|
|
ca3909 |
Date: Wed, 20 Nov 2019 12:26:11 +0530
|
|
|
ca3909 |
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
|
|
|
ca3909 |
healed_sinks
|
|
|
ca3909 |
|
|
|
ca3909 |
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
|
|
|
ca3909 |
|
|
|
ca3909 |
Problem:
|
|
|
ca3909 |
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
|
|
|
ca3909 |
anything for entry heal, heal will not complete even though we have
|
|
|
ca3909 |
clear source and sinks. This will happen because while doing
|
|
|
ca3909 |
afr_selfheal_find_direction() only the bricks which are blamed by
|
|
|
ca3909 |
non-accused bricks are considered as sinks. Later in
|
|
|
ca3909 |
__afr_selfheal_entry_finalize_source() when it tries to mark all the
|
|
|
ca3909 |
non-sources as sinks it fails to do so because there won't be any
|
|
|
ca3909 |
healed_sinks marked, no witness present and there will be a source.
|
|
|
ca3909 |
|
|
|
ca3909 |
Fix:
|
|
|
ca3909 |
If there is a source and no healed_sinks, then reset all the locked
|
|
|
ca3909 |
sources to 0 and healed sinks to 1 to do conservative merge.
|
|
|
ca3909 |
|
|
|
ca3909 |
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
|
|
|
ca3909 |
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
|
ca3909 |
BUG: 1764095
|
|
|
ca3909 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
|
|
|
ca3909 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
ca3909 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
ca3909 |
---
|
|
|
ca3909 |
.../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++
|
|
|
ca3909 |
xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++
|
|
|
ca3909 |
2 files changed, 104 insertions(+)
|
|
|
ca3909 |
create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
ca3909 |
|
|
|
ca3909 |
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
ca3909 |
new file mode 100644
|
|
|
ca3909 |
index 0000000..9627908
|
|
|
ca3909 |
--- /dev/null
|
|
|
ca3909 |
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
ca3909 |
@@ -0,0 +1,89 @@
|
|
|
ca3909 |
+#!/bin/bash
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+. $(dirname $0)/../../include.rc
|
|
|
ca3909 |
+. $(dirname $0)/../../volume.rc
|
|
|
ca3909 |
+. $(dirname $0)/../../afr.rc
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+cleanup
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+function check_gfid_and_link_count
|
|
|
ca3909 |
+{
|
|
|
ca3909 |
+ local file=$1
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
|
|
|
ca3909 |
+ TEST [ ! -z $file_gfid_b0 ]
|
|
|
ca3909 |
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
|
|
|
ca3909 |
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
|
|
|
ca3909 |
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
|
|
|
ca3909 |
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
|
|
|
ca3909 |
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
|
|
|
ca3909 |
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
|
|
|
ca3909 |
+}
|
|
|
ca3909 |
+TESTS_EXPECTED_IN_LOOP=18
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+################################################################################
|
|
|
ca3909 |
+## Start and create a volume
|
|
|
ca3909 |
+TEST glusterd;
|
|
|
ca3909 |
+TEST pidof glusterd;
|
|
|
ca3909 |
+TEST $CLI volume info;
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
|
|
ca3909 |
+TEST $CLI volume start $V0;
|
|
|
ca3909 |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
ca3909 |
+TEST $CLI volume heal $V0 disable
|
|
|
ca3909 |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
ca3909 |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+TEST mkdir $M0/dir
|
|
|
ca3909 |
+TEST `echo "File 1 " > $M0/dir/file1`
|
|
|
ca3909 |
+TEST touch $M0/dir/file{2..4}
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# Remove file2 from 1st & 3rd bricks
|
|
|
ca3909 |
+TEST rm -f $B0/$V0"0"/dir/file2
|
|
|
ca3909 |
+TEST rm -f $B0/$V0"2"/dir/file2
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
|
|
|
ca3909 |
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
|
|
|
ca3909 |
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
|
|
|
ca3909 |
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
|
ca3909 |
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
|
ca3909 |
+TEST rm -f $B0/$V0"0"/dir/file3
|
|
|
ca3909 |
+TEST rm -f $B0/$V0"1"/dir/file3
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
|
|
|
ca3909 |
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
|
|
|
ca3909 |
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
|
|
|
ca3909 |
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
|
|
|
ca3909 |
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# B0 and B2 blame each other
|
|
|
ca3909 |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
ca3909 |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# Add entry to xattrop dir on first brick.
|
|
|
ca3909 |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
ca3909 |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
ca3909 |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
ca3909 |
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# Launch heal
|
|
|
ca3909 |
+TEST $CLI volume heal $V0 enable
|
|
|
ca3909 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
ca3909 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
ca3909 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
ca3909 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
ca3909 |
+TEST $CLI volume heal $V0
|
|
|
ca3909 |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+# All the files must be present on all the bricks after conservative merge and
|
|
|
ca3909 |
+# should have the gfid xattr and the .glusterfs hardlink.
|
|
|
ca3909 |
+check_gfid_and_link_count dir/file1
|
|
|
ca3909 |
+check_gfid_and_link_count dir/file2
|
|
|
ca3909 |
+check_gfid_and_link_count dir/file3
|
|
|
ca3909 |
+check_gfid_and_link_count dir/file4
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+cleanup
|
|
|
ca3909 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
ca3909 |
index 35b600f..3ce882e 100644
|
|
|
ca3909 |
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
ca3909 |
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
ca3909 |
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
|
|
|
ca3909 |
afr_private_t *priv = NULL;
|
|
|
ca3909 |
int source = -1;
|
|
|
ca3909 |
int sources_count = 0;
|
|
|
ca3909 |
+ int i = 0;
|
|
|
ca3909 |
|
|
|
ca3909 |
priv = this->private;
|
|
|
ca3909 |
|
|
|
ca3909 |
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
|
|
|
ca3909 |
}
|
|
|
ca3909 |
|
|
|
ca3909 |
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
|
|
|
ca3909 |
+
|
|
|
ca3909 |
+ /*If the selected source does not blame any other brick, then mark
|
|
|
ca3909 |
+ * everything as sink to trigger conservative merge.
|
|
|
ca3909 |
+ */
|
|
|
ca3909 |
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
|
|
|
ca3909 |
+ for (i = 0; i < priv->child_count; i++) {
|
|
|
ca3909 |
+ if (locked_on[i]) {
|
|
|
ca3909 |
+ sources[i] = 0;
|
|
|
ca3909 |
+ healed_sinks[i] = 1;
|
|
|
ca3909 |
+ }
|
|
|
ca3909 |
+ }
|
|
|
ca3909 |
+ return -1;
|
|
|
ca3909 |
+ }
|
|
|
ca3909 |
+
|
|
|
ca3909 |
return source;
|
|
|
ca3909 |
}
|
|
|
ca3909 |
|
|
|
ca3909 |
--
|
|
|
ca3909 |
1.8.3.1
|
|
|
ca3909 |
|