74b1de
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
74b1de
From: karthik-us <ksubrahm@redhat.com>
74b1de
Date: Wed, 20 Nov 2019 12:26:11 +0530
74b1de
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
74b1de
 healed_sinks
74b1de
74b1de
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
74b1de
74b1de
Problem:
74b1de
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
74b1de
anything for entry heal, heal will not complete even though we have
74b1de
clear source and sinks. This will happen because while doing
74b1de
afr_selfheal_find_direction() only the bricks which are blamed by
74b1de
non-accused bricks are considered as sinks. Later in
74b1de
__afr_selfheal_entry_finalize_source() when it tries to mark all the
74b1de
non-sources as sinks it fails to do so because there won't be any
74b1de
healed_sinks marked, no witness present and there will be a source.
74b1de
74b1de
Fix:
74b1de
If there is a source and no healed_sinks, then reset all the locked
74b1de
sources to 0 and healed sinks to 1 to do conservative merge.
74b1de
74b1de
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
74b1de
Signed-off-by: karthik-us <ksubrahm@redhat.com>
74b1de
BUG: 1764095
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
74b1de
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
74b1de
 2 files changed, 104 insertions(+)
74b1de
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74b1de
74b1de
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74b1de
new file mode 100644
74b1de
index 0000000..9627908
74b1de
--- /dev/null
74b1de
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74b1de
@@ -0,0 +1,89 @@
74b1de
+#!/bin/bash
74b1de
+
74b1de
+. $(dirname $0)/../../include.rc
74b1de
+. $(dirname $0)/../../volume.rc
74b1de
+. $(dirname $0)/../../afr.rc
74b1de
+
74b1de
+cleanup
74b1de
+
74b1de
+function check_gfid_and_link_count
74b1de
+{
74b1de
+        local file=$1
74b1de
+
74b1de
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
74b1de
+        TEST [ ! -z $file_gfid_b0 ]
74b1de
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
74b1de
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
74b1de
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
74b1de
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
74b1de
+
74b1de
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
74b1de
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
74b1de
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
74b1de
+}
74b1de
+TESTS_EXPECTED_IN_LOOP=18
74b1de
+
74b1de
+################################################################################
74b1de
+## Start and create a volume
74b1de
+TEST glusterd;
74b1de
+TEST pidof glusterd;
74b1de
+TEST $CLI volume info;
74b1de
+
74b1de
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
74b1de
+TEST $CLI volume start $V0;
74b1de
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74b1de
+TEST $CLI volume heal $V0 disable
74b1de
+EXPECT 'Started' volinfo_field $V0 'Status';
74b1de
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
74b1de
+
74b1de
+TEST mkdir $M0/dir
74b1de
+TEST `echo "File 1 " > $M0/dir/file1`
74b1de
+TEST touch $M0/dir/file{2..4}
74b1de
+
74b1de
+# Remove file2 from 1st & 3rd bricks
74b1de
+TEST rm -f $B0/$V0"0"/dir/file2
74b1de
+TEST rm -f $B0/$V0"2"/dir/file2
74b1de
+
74b1de
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
74b1de
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
74b1de
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
74b1de
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
74b1de
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
74b1de
+TEST rm -f $B0/$V0"0"/dir/file3
74b1de
+TEST rm -f $B0/$V0"1"/dir/file3
74b1de
+
74b1de
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
74b1de
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
74b1de
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
74b1de
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
74b1de
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
74b1de
+
74b1de
+# B0 and B2 blame each other
74b1de
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74b1de
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74b1de
+
74b1de
+# Add entry to xattrop dir on first brick.
74b1de
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74b1de
+base_entry_b0=`ls $xattrop_dir0`
74b1de
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74b1de
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74b1de
+
74b1de
+EXPECT "^1$" get_pending_heal_count $V0
74b1de
+
74b1de
+# Launch heal
74b1de
+TEST $CLI volume heal $V0 enable
74b1de
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74b1de
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74b1de
+TEST $CLI volume heal $V0
74b1de
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74b1de
+
74b1de
+# All the files must be present on all the bricks after conservative merge and
74b1de
+# should have the gfid xattr and the .glusterfs hardlink.
74b1de
+check_gfid_and_link_count dir/file1
74b1de
+check_gfid_and_link_count dir/file2
74b1de
+check_gfid_and_link_count dir/file3
74b1de
+check_gfid_and_link_count dir/file4
74b1de
+
74b1de
+cleanup
74b1de
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
index 35b600f..3ce882e 100644
74b1de
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
74b1de
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
74b1de
     afr_private_t *priv = NULL;
74b1de
     int source = -1;
74b1de
     int sources_count = 0;
74b1de
+    int i = 0;
74b1de
 
74b1de
     priv = this->private;
74b1de
 
74b1de
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
74b1de
     }
74b1de
 
74b1de
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
74b1de
+
74b1de
+    /*If the selected source does not blame any other brick, then mark
74b1de
+     * everything as sink to trigger conservative merge.
74b1de
+     */
74b1de
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
74b1de
+        for (i = 0; i < priv->child_count; i++) {
74b1de
+            if (locked_on[i]) {
74b1de
+                sources[i] = 0;
74b1de
+                healed_sinks[i] = 1;
74b1de
+            }
74b1de
+        }
74b1de
+        return -1;
74b1de
+    }
74b1de
+
74b1de
     return source;
74b1de
 }
74b1de
 
74b1de
-- 
74b1de
1.8.3.1
74b1de