17b94a
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
17b94a
From: karthik-us <ksubrahm@redhat.com>
17b94a
Date: Wed, 20 Nov 2019 12:26:11 +0530
17b94a
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
17b94a
 healed_sinks
17b94a
17b94a
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
17b94a
17b94a
Problem:
17b94a
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
17b94a
anything for entry heal, heal will not complete even though we have
17b94a
clear source and sinks. This will happen because while doing
17b94a
afr_selfheal_find_direction() only the bricks which are blamed by
17b94a
non-accused bricks are considered as sinks. Later in
17b94a
__afr_selfheal_entry_finalize_source() when it tries to mark all the
17b94a
non-sources as sinks it fails to do so because there won't be any
17b94a
healed_sinks marked, no witness present and there will be a source.
17b94a
17b94a
Fix:
17b94a
If there is a source and no healed_sinks, then reset all the locked
17b94a
sources to 0 and healed sinks to 1 to do conservative merge.
17b94a
17b94a
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
17b94a
Signed-off-by: karthik-us <ksubrahm@redhat.com>
17b94a
BUG: 1764095
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
17b94a
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
17b94a
 2 files changed, 104 insertions(+)
17b94a
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
17b94a
17b94a
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
17b94a
new file mode 100644
17b94a
index 0000000..9627908
17b94a
--- /dev/null
17b94a
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
17b94a
@@ -0,0 +1,89 @@
17b94a
+#!/bin/bash
17b94a
+
17b94a
+. $(dirname $0)/../../include.rc
17b94a
+. $(dirname $0)/../../volume.rc
17b94a
+. $(dirname $0)/../../afr.rc
17b94a
+
17b94a
+cleanup
17b94a
+
17b94a
+function check_gfid_and_link_count
17b94a
+{
17b94a
+        local file=$1
17b94a
+
17b94a
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
17b94a
+        TEST [ ! -z $file_gfid_b0 ]
17b94a
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
17b94a
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
17b94a
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
17b94a
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
17b94a
+
17b94a
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
17b94a
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
17b94a
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
17b94a
+}
17b94a
+TESTS_EXPECTED_IN_LOOP=18
17b94a
+
17b94a
+################################################################################
17b94a
+## Start and create a volume
17b94a
+TEST glusterd;
17b94a
+TEST pidof glusterd;
17b94a
+TEST $CLI volume info;
17b94a
+
17b94a
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
17b94a
+TEST $CLI volume start $V0;
17b94a
+TEST $CLI volume set $V0 cluster.heal-timeout 5
17b94a
+TEST $CLI volume heal $V0 disable
17b94a
+EXPECT 'Started' volinfo_field $V0 'Status';
17b94a
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
17b94a
+
17b94a
+TEST mkdir $M0/dir
17b94a
+TEST `echo "File 1 " > $M0/dir/file1`
17b94a
+TEST touch $M0/dir/file{2..4}
17b94a
+
17b94a
+# Remove file2 from 1st & 3rd bricks
17b94a
+TEST rm -f $B0/$V0"0"/dir/file2
17b94a
+TEST rm -f $B0/$V0"2"/dir/file2
17b94a
+
17b94a
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
17b94a
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
17b94a
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
17b94a
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
17b94a
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
17b94a
+TEST rm -f $B0/$V0"0"/dir/file3
17b94a
+TEST rm -f $B0/$V0"1"/dir/file3
17b94a
+
17b94a
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
17b94a
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
17b94a
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
17b94a
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
17b94a
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
17b94a
+
17b94a
+# B0 and B2 blame each other
17b94a
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
17b94a
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
17b94a
+
17b94a
+# Add entry to xattrop dir on first brick.
17b94a
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
17b94a
+base_entry_b0=`ls $xattrop_dir0`
17b94a
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
17b94a
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
17b94a
+
17b94a
+EXPECT "^1$" get_pending_heal_count $V0
17b94a
+
17b94a
+# Launch heal
17b94a
+TEST $CLI volume heal $V0 enable
17b94a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
17b94a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
17b94a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
17b94a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
17b94a
+TEST $CLI volume heal $V0
17b94a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
17b94a
+
17b94a
+# All the files must be present on all the bricks after conservative merge and
17b94a
+# should have the gfid xattr and the .glusterfs hardlink.
17b94a
+check_gfid_and_link_count dir/file1
17b94a
+check_gfid_and_link_count dir/file2
17b94a
+check_gfid_and_link_count dir/file3
17b94a
+check_gfid_and_link_count dir/file4
17b94a
+
17b94a
+cleanup
17b94a
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
17b94a
index 35b600f..3ce882e 100644
17b94a
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
17b94a
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
17b94a
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
17b94a
     afr_private_t *priv = NULL;
17b94a
     int source = -1;
17b94a
     int sources_count = 0;
17b94a
+    int i = 0;
17b94a
 
17b94a
     priv = this->private;
17b94a
 
17b94a
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
17b94a
     }
17b94a
 
17b94a
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
17b94a
+
17b94a
+    /*If the selected source does not blame any other brick, then mark
17b94a
+     * everything as sink to trigger conservative merge.
17b94a
+     */
17b94a
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
17b94a
+        for (i = 0; i < priv->child_count; i++) {
17b94a
+            if (locked_on[i]) {
17b94a
+                sources[i] = 0;
17b94a
+                healed_sinks[i] = 1;
17b94a
+            }
17b94a
+        }
17b94a
+        return -1;
17b94a
+    }
17b94a
+
17b94a
     return source;
17b94a
 }
17b94a
 
17b94a
-- 
17b94a
1.8.3.1
17b94a