14f8ab
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
14f8ab
From: karthik-us <ksubrahm@redhat.com>
14f8ab
Date: Wed, 20 Nov 2019 12:26:11 +0530
14f8ab
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
14f8ab
 healed_sinks
14f8ab
14f8ab
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
14f8ab
14f8ab
Problem:
14f8ab
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
14f8ab
anything for entry heal, heal will not complete even though we have
14f8ab
clear source and sinks. This will happen because while doing
14f8ab
afr_selfheal_find_direction() only the bricks which are blamed by
14f8ab
non-accused bricks are considered as sinks. Later in
14f8ab
__afr_selfheal_entry_finalize_source() when it tries to mark all the
14f8ab
non-sources as sinks it fails to do so because there won't be any
14f8ab
healed_sinks marked, no witness present and there will be a source.
14f8ab
14f8ab
Fix:
14f8ab
If there is a source and no healed_sinks, then reset all the locked
14f8ab
sources to 0 and healed sinks to 1 to do conservative merge.
14f8ab
14f8ab
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
14f8ab
Signed-off-by: karthik-us <ksubrahm@redhat.com>
14f8ab
BUG: 1764095
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
14f8ab
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
14f8ab
 2 files changed, 104 insertions(+)
14f8ab
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
14f8ab
14f8ab
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
14f8ab
new file mode 100644
14f8ab
index 0000000..9627908
14f8ab
--- /dev/null
14f8ab
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
14f8ab
@@ -0,0 +1,89 @@
14f8ab
+#!/bin/bash
14f8ab
+
14f8ab
+. $(dirname $0)/../../include.rc
14f8ab
+. $(dirname $0)/../../volume.rc
14f8ab
+. $(dirname $0)/../../afr.rc
14f8ab
+
14f8ab
+cleanup
14f8ab
+
14f8ab
+function check_gfid_and_link_count
14f8ab
+{
14f8ab
+        local file=$1
14f8ab
+
14f8ab
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
14f8ab
+        TEST [ ! -z $file_gfid_b0 ]
14f8ab
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
14f8ab
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
14f8ab
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
14f8ab
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
14f8ab
+
14f8ab
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
14f8ab
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
14f8ab
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
14f8ab
+}
14f8ab
+TESTS_EXPECTED_IN_LOOP=18
14f8ab
+
14f8ab
+################################################################################
14f8ab
+## Start and create a volume
14f8ab
+TEST glusterd;
14f8ab
+TEST pidof glusterd;
14f8ab
+TEST $CLI volume info;
14f8ab
+
14f8ab
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
14f8ab
+TEST $CLI volume start $V0;
14f8ab
+TEST $CLI volume set $V0 cluster.heal-timeout 5
14f8ab
+TEST $CLI volume heal $V0 disable
14f8ab
+EXPECT 'Started' volinfo_field $V0 'Status';
14f8ab
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
14f8ab
+
14f8ab
+TEST mkdir $M0/dir
14f8ab
+TEST `echo "File 1 " > $M0/dir/file1`
14f8ab
+TEST touch $M0/dir/file{2..4}
14f8ab
+
14f8ab
+# Remove file2 from 1st & 3rd bricks
14f8ab
+TEST rm -f $B0/$V0"0"/dir/file2
14f8ab
+TEST rm -f $B0/$V0"2"/dir/file2
14f8ab
+
14f8ab
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
14f8ab
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
14f8ab
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
14f8ab
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
14f8ab
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
14f8ab
+TEST rm -f $B0/$V0"0"/dir/file3
14f8ab
+TEST rm -f $B0/$V0"1"/dir/file3
14f8ab
+
14f8ab
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
14f8ab
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
14f8ab
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
14f8ab
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
14f8ab
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
14f8ab
+
14f8ab
+# B0 and B2 blame each other
14f8ab
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
14f8ab
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
14f8ab
+
14f8ab
+# Add entry to xattrop dir on first brick.
14f8ab
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
14f8ab
+base_entry_b0=`ls $xattrop_dir0`
14f8ab
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
14f8ab
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
14f8ab
+
14f8ab
+EXPECT "^1$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+# Launch heal
14f8ab
+TEST $CLI volume heal $V0 enable
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
14f8ab
+TEST $CLI volume heal $V0
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
14f8ab
+
14f8ab
+# All the files must be present on all the bricks after conservative merge and
14f8ab
+# should have the gfid xattr and the .glusterfs hardlink.
14f8ab
+check_gfid_and_link_count dir/file1
14f8ab
+check_gfid_and_link_count dir/file2
14f8ab
+check_gfid_and_link_count dir/file3
14f8ab
+check_gfid_and_link_count dir/file4
14f8ab
+
14f8ab
+cleanup
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
index 35b600f..3ce882e 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
14f8ab
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
14f8ab
     afr_private_t *priv = NULL;
14f8ab
     int source = -1;
14f8ab
     int sources_count = 0;
14f8ab
+    int i = 0;
14f8ab
 
14f8ab
     priv = this->private;
14f8ab
 
14f8ab
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
14f8ab
     }
14f8ab
 
14f8ab
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
14f8ab
+
14f8ab
+    /*If the selected source does not blame any other brick, then mark
14f8ab
+     * everything as sink to trigger conservative merge.
14f8ab
+     */
14f8ab
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
14f8ab
+        for (i = 0; i < priv->child_count; i++) {
14f8ab
+            if (locked_on[i]) {
14f8ab
+                sources[i] = 0;
14f8ab
+                healed_sinks[i] = 1;
14f8ab
+            }
14f8ab
+        }
14f8ab
+        return -1;
14f8ab
+    }
14f8ab
+
14f8ab
     return source;
14f8ab
 }
14f8ab
 
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab