1df6c8
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
1df6c8
From: karthik-us <ksubrahm@redhat.com>
1df6c8
Date: Wed, 20 Nov 2019 12:26:11 +0530
1df6c8
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
1df6c8
 healed_sinks
1df6c8
1df6c8
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
1df6c8
1df6c8
Problem:
1df6c8
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
1df6c8
anything for entry heal, heal will not complete even though we have
1df6c8
clear source and sinks. This will happen because while doing
1df6c8
afr_selfheal_find_direction() only the bricks which are blamed by
1df6c8
non-accused bricks are considered as sinks. Later in
1df6c8
__afr_selfheal_entry_finalize_source() when it tries to mark all the
1df6c8
non-sources as sinks it fails to do so because there won't be any
1df6c8
healed_sinks marked, no witness present and there will be a source.
1df6c8
1df6c8
Fix:
1df6c8
If there is a source and no healed_sinks, then reset all the locked
1df6c8
sources to 0 and healed sinks to 1 to do conservative merge.
1df6c8
1df6c8
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
1df6c8
Signed-off-by: karthik-us <ksubrahm@redhat.com>
1df6c8
BUG: 1764095
1df6c8
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
1df6c8
Tested-by: RHGS Build Bot <nigelb@redhat.com>
1df6c8
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
1df6c8
---
1df6c8
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
1df6c8
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
1df6c8
 2 files changed, 104 insertions(+)
1df6c8
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
1df6c8
1df6c8
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
1df6c8
new file mode 100644
1df6c8
index 0000000..9627908
1df6c8
--- /dev/null
1df6c8
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
1df6c8
@@ -0,0 +1,89 @@
1df6c8
+#!/bin/bash
1df6c8
+
1df6c8
+. $(dirname $0)/../../include.rc
1df6c8
+. $(dirname $0)/../../volume.rc
1df6c8
+. $(dirname $0)/../../afr.rc
1df6c8
+
1df6c8
+cleanup
1df6c8
+
1df6c8
+function check_gfid_and_link_count
1df6c8
+{
1df6c8
+        local file=$1
1df6c8
+
1df6c8
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
1df6c8
+        TEST [ ! -z $file_gfid_b0 ]
1df6c8
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
1df6c8
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
1df6c8
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
1df6c8
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
1df6c8
+
1df6c8
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
1df6c8
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
1df6c8
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
1df6c8
+}
1df6c8
+TESTS_EXPECTED_IN_LOOP=18
1df6c8
+
1df6c8
+################################################################################
1df6c8
+## Start and create a volume
1df6c8
+TEST glusterd;
1df6c8
+TEST pidof glusterd;
1df6c8
+TEST $CLI volume info;
1df6c8
+
1df6c8
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
1df6c8
+TEST $CLI volume start $V0;
1df6c8
+TEST $CLI volume set $V0 cluster.heal-timeout 5
1df6c8
+TEST $CLI volume heal $V0 disable
1df6c8
+EXPECT 'Started' volinfo_field $V0 'Status';
1df6c8
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
1df6c8
+
1df6c8
+TEST mkdir $M0/dir
1df6c8
+TEST `echo "File 1 " > $M0/dir/file1`
1df6c8
+TEST touch $M0/dir/file{2..4}
1df6c8
+
1df6c8
+# Remove file2 from 1st & 3rd bricks
1df6c8
+TEST rm -f $B0/$V0"0"/dir/file2
1df6c8
+TEST rm -f $B0/$V0"2"/dir/file2
1df6c8
+
1df6c8
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
1df6c8
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
1df6c8
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
1df6c8
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
1df6c8
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
1df6c8
+TEST rm -f $B0/$V0"0"/dir/file3
1df6c8
+TEST rm -f $B0/$V0"1"/dir/file3
1df6c8
+
1df6c8
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
1df6c8
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
1df6c8
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
1df6c8
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
1df6c8
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
1df6c8
+
1df6c8
+# B0 and B2 blame each other
1df6c8
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
1df6c8
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
1df6c8
+
1df6c8
+# Add entry to xattrop dir on first brick.
1df6c8
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
1df6c8
+base_entry_b0=`ls $xattrop_dir0`
1df6c8
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
1df6c8
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
1df6c8
+
1df6c8
+EXPECT "^1$" get_pending_heal_count $V0
1df6c8
+
1df6c8
+# Launch heal
1df6c8
+TEST $CLI volume heal $V0 enable
1df6c8
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
1df6c8
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
1df6c8
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
1df6c8
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
1df6c8
+TEST $CLI volume heal $V0
1df6c8
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
1df6c8
+
1df6c8
+# All the files must be present on all the bricks after conservative merge and
1df6c8
+# should have the gfid xattr and the .glusterfs hardlink.
1df6c8
+check_gfid_and_link_count dir/file1
1df6c8
+check_gfid_and_link_count dir/file2
1df6c8
+check_gfid_and_link_count dir/file3
1df6c8
+check_gfid_and_link_count dir/file4
1df6c8
+
1df6c8
+cleanup
1df6c8
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
1df6c8
index 35b600f..3ce882e 100644
1df6c8
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
1df6c8
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
1df6c8
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
1df6c8
     afr_private_t *priv = NULL;
1df6c8
     int source = -1;
1df6c8
     int sources_count = 0;
1df6c8
+    int i = 0;
1df6c8
 
1df6c8
     priv = this->private;
1df6c8
 
1df6c8
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
1df6c8
     }
1df6c8
 
1df6c8
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
1df6c8
+
1df6c8
+    /*If the selected source does not blame any other brick, then mark
1df6c8
+     * everything as sink to trigger conservative merge.
1df6c8
+     */
1df6c8
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
1df6c8
+        for (i = 0; i < priv->child_count; i++) {
1df6c8
+            if (locked_on[i]) {
1df6c8
+                sources[i] = 0;
1df6c8
+                healed_sinks[i] = 1;
1df6c8
+            }
1df6c8
+        }
1df6c8
+        return -1;
1df6c8
+    }
1df6c8
+
1df6c8
     return source;
1df6c8
 }
1df6c8
 
1df6c8
-- 
1df6c8
1.8.3.1
1df6c8