e3c68b
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
e3c68b
From: karthik-us <ksubrahm@redhat.com>
e3c68b
Date: Wed, 20 Nov 2019 12:26:11 +0530
e3c68b
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
e3c68b
 healed_sinks
e3c68b
e3c68b
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
e3c68b
e3c68b
Problem:
e3c68b
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
e3c68b
anything for entry heal, heal will not complete even though we have
e3c68b
clear source and sinks. This will happen because while doing
e3c68b
afr_selfheal_find_direction() only the bricks which are blamed by
e3c68b
non-accused bricks are considered as sinks. Later in
e3c68b
__afr_selfheal_entry_finalize_source() when it tries to mark all the
e3c68b
non-sources as sinks it fails to do so because there won't be any
e3c68b
healed_sinks marked, no witness present and there will be a source.
e3c68b
e3c68b
Fix:
e3c68b
If there is a source and no healed_sinks, then reset all the locked
e3c68b
sources to 0 and healed sinks to 1 to do conservative merge.
e3c68b
e3c68b
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
e3c68b
Signed-off-by: karthik-us <ksubrahm@redhat.com>
e3c68b
BUG: 1764095
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
e3c68b
---
e3c68b
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
e3c68b
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
e3c68b
 2 files changed, 104 insertions(+)
e3c68b
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
e3c68b
e3c68b
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
e3c68b
new file mode 100644
e3c68b
index 0000000..9627908
e3c68b
--- /dev/null
e3c68b
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
e3c68b
@@ -0,0 +1,89 @@
e3c68b
+#!/bin/bash
e3c68b
+
e3c68b
+. $(dirname $0)/../../include.rc
e3c68b
+. $(dirname $0)/../../volume.rc
e3c68b
+. $(dirname $0)/../../afr.rc
e3c68b
+
e3c68b
+cleanup
e3c68b
+
e3c68b
+function check_gfid_and_link_count
e3c68b
+{
e3c68b
+        local file=$1
e3c68b
+
e3c68b
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
e3c68b
+        TEST [ ! -z $file_gfid_b0 ]
e3c68b
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
e3c68b
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
e3c68b
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
e3c68b
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
e3c68b
+
e3c68b
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
e3c68b
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
e3c68b
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
e3c68b
+}
e3c68b
+TESTS_EXPECTED_IN_LOOP=18
e3c68b
+
e3c68b
+################################################################################
e3c68b
+## Start and create a volume
e3c68b
+TEST glusterd;
e3c68b
+TEST pidof glusterd;
e3c68b
+TEST $CLI volume info;
e3c68b
+
e3c68b
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
e3c68b
+TEST $CLI volume start $V0;
e3c68b
+TEST $CLI volume set $V0 cluster.heal-timeout 5
e3c68b
+TEST $CLI volume heal $V0 disable
e3c68b
+EXPECT 'Started' volinfo_field $V0 'Status';
e3c68b
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
e3c68b
+
e3c68b
+TEST mkdir $M0/dir
e3c68b
+TEST `echo "File 1 " > $M0/dir/file1`
e3c68b
+TEST touch $M0/dir/file{2..4}
e3c68b
+
e3c68b
+# Remove file2 from 1st & 3rd bricks
e3c68b
+TEST rm -f $B0/$V0"0"/dir/file2
e3c68b
+TEST rm -f $B0/$V0"2"/dir/file2
e3c68b
+
e3c68b
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
e3c68b
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
e3c68b
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
e3c68b
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
e3c68b
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
e3c68b
+TEST rm -f $B0/$V0"0"/dir/file3
e3c68b
+TEST rm -f $B0/$V0"1"/dir/file3
e3c68b
+
e3c68b
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
e3c68b
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
e3c68b
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
e3c68b
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
e3c68b
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
e3c68b
+
e3c68b
+# B0 and B2 blame each other
e3c68b
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
e3c68b
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
e3c68b
+
e3c68b
+# Add entry to xattrop dir on first brick.
e3c68b
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
e3c68b
+base_entry_b0=`ls $xattrop_dir0`
e3c68b
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
e3c68b
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
e3c68b
+
e3c68b
+EXPECT "^1$" get_pending_heal_count $V0
e3c68b
+
e3c68b
+# Launch heal
e3c68b
+TEST $CLI volume heal $V0 enable
e3c68b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
e3c68b
+TEST $CLI volume heal $V0
e3c68b
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
e3c68b
+
e3c68b
+# All the files must be present on all the bricks after conservative merge and
e3c68b
+# should have the gfid xattr and the .glusterfs hardlink.
e3c68b
+check_gfid_and_link_count dir/file1
e3c68b
+check_gfid_and_link_count dir/file2
e3c68b
+check_gfid_and_link_count dir/file3
e3c68b
+check_gfid_and_link_count dir/file4
e3c68b
+
e3c68b
+cleanup
e3c68b
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
e3c68b
index 35b600f..3ce882e 100644
e3c68b
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
e3c68b
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
e3c68b
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
e3c68b
     afr_private_t *priv = NULL;
e3c68b
     int source = -1;
e3c68b
     int sources_count = 0;
e3c68b
+    int i = 0;
e3c68b
 
e3c68b
     priv = this->private;
e3c68b
 
e3c68b
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
e3c68b
     }
e3c68b
 
e3c68b
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
e3c68b
+
e3c68b
+    /*If the selected source does not blame any other brick, then mark
e3c68b
+     * everything as sink to trigger conservative merge.
e3c68b
+     */
e3c68b
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
e3c68b
+        for (i = 0; i < priv->child_count; i++) {
e3c68b
+            if (locked_on[i]) {
e3c68b
+                sources[i] = 0;
e3c68b
+                healed_sinks[i] = 1;
e3c68b
+            }
e3c68b
+        }
e3c68b
+        return -1;
e3c68b
+    }
e3c68b
+
e3c68b
     return source;
e3c68b
 }
e3c68b
 
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b