9f5ccc
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
9f5ccc
From: karthik-us <ksubrahm@redhat.com>
9f5ccc
Date: Wed, 20 Nov 2019 12:26:11 +0530
9f5ccc
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
9f5ccc
 healed_sinks
9f5ccc
9f5ccc
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
9f5ccc
9f5ccc
Problem:
9f5ccc
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
9f5ccc
anything for entry heal, heal will not complete even though we have
9f5ccc
clear source and sinks. This will happen because while doing
9f5ccc
afr_selfheal_find_direction() only the bricks which are blamed by
9f5ccc
non-accused bricks are considered as sinks. Later in
9f5ccc
__afr_selfheal_entry_finalize_source() when it tries to mark all the
9f5ccc
non-sources as sinks it fails to do so because there won't be any
9f5ccc
healed_sinks marked, no witness present and there will be a source.
9f5ccc
9f5ccc
Fix:
9f5ccc
If there is a source and no healed_sinks, then reset all the locked
9f5ccc
sources to 0 and healed sinks to 1 to do conservative merge.
9f5ccc
9f5ccc
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
9f5ccc
Signed-off-by: karthik-us <ksubrahm@redhat.com>
9f5ccc
BUG: 1764095
9f5ccc
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
9f5ccc
Tested-by: RHGS Build Bot <nigelb@redhat.com>
9f5ccc
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
9f5ccc
---
9f5ccc
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
9f5ccc
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
9f5ccc
 2 files changed, 104 insertions(+)
9f5ccc
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
9f5ccc
9f5ccc
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
9f5ccc
new file mode 100644
9f5ccc
index 0000000..9627908
9f5ccc
--- /dev/null
9f5ccc
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
9f5ccc
@@ -0,0 +1,89 @@
9f5ccc
+#!/bin/bash
9f5ccc
+
9f5ccc
+. $(dirname $0)/../../include.rc
9f5ccc
+. $(dirname $0)/../../volume.rc
9f5ccc
+. $(dirname $0)/../../afr.rc
9f5ccc
+
9f5ccc
+cleanup
9f5ccc
+
9f5ccc
+function check_gfid_and_link_count
9f5ccc
+{
9f5ccc
+        local file=$1
9f5ccc
+
9f5ccc
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
9f5ccc
+        TEST [ ! -z $file_gfid_b0 ]
9f5ccc
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
9f5ccc
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
9f5ccc
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
9f5ccc
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
9f5ccc
+
9f5ccc
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
9f5ccc
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
9f5ccc
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
9f5ccc
+}
9f5ccc
+TESTS_EXPECTED_IN_LOOP=18
9f5ccc
+
9f5ccc
+################################################################################
9f5ccc
+## Start and create a volume
9f5ccc
+TEST glusterd;
9f5ccc
+TEST pidof glusterd;
9f5ccc
+TEST $CLI volume info;
9f5ccc
+
9f5ccc
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
9f5ccc
+TEST $CLI volume start $V0;
9f5ccc
+TEST $CLI volume set $V0 cluster.heal-timeout 5
9f5ccc
+TEST $CLI volume heal $V0 disable
9f5ccc
+EXPECT 'Started' volinfo_field $V0 'Status';
9f5ccc
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
9f5ccc
+
9f5ccc
+TEST mkdir $M0/dir
9f5ccc
+TEST `echo "File 1 " > $M0/dir/file1`
9f5ccc
+TEST touch $M0/dir/file{2..4}
9f5ccc
+
9f5ccc
+# Remove file2 from 1st & 3rd bricks
9f5ccc
+TEST rm -f $B0/$V0"0"/dir/file2
9f5ccc
+TEST rm -f $B0/$V0"2"/dir/file2
9f5ccc
+
9f5ccc
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
9f5ccc
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
9f5ccc
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
9f5ccc
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
9f5ccc
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
9f5ccc
+TEST rm -f $B0/$V0"0"/dir/file3
9f5ccc
+TEST rm -f $B0/$V0"1"/dir/file3
9f5ccc
+
9f5ccc
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
9f5ccc
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
9f5ccc
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
9f5ccc
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
9f5ccc
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
9f5ccc
+
9f5ccc
+# B0 and B2 blame each other
9f5ccc
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
9f5ccc
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
9f5ccc
+
9f5ccc
+# Add entry to xattrop dir on first brick.
9f5ccc
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
9f5ccc
+base_entry_b0=`ls $xattrop_dir0`
9f5ccc
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
9f5ccc
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
9f5ccc
+
9f5ccc
+EXPECT "^1$" get_pending_heal_count $V0
9f5ccc
+
9f5ccc
+# Launch heal
9f5ccc
+TEST $CLI volume heal $V0 enable
9f5ccc
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
9f5ccc
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
9f5ccc
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
9f5ccc
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
9f5ccc
+TEST $CLI volume heal $V0
9f5ccc
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
9f5ccc
+
9f5ccc
+# All the files must be present on all the bricks after conservative merge and
9f5ccc
+# should have the gfid xattr and the .glusterfs hardlink.
9f5ccc
+check_gfid_and_link_count dir/file1
9f5ccc
+check_gfid_and_link_count dir/file2
9f5ccc
+check_gfid_and_link_count dir/file3
9f5ccc
+check_gfid_and_link_count dir/file4
9f5ccc
+
9f5ccc
+cleanup
9f5ccc
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
9f5ccc
index 35b600f..3ce882e 100644
9f5ccc
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
9f5ccc
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
9f5ccc
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
9f5ccc
     afr_private_t *priv = NULL;
9f5ccc
     int source = -1;
9f5ccc
     int sources_count = 0;
9f5ccc
+    int i = 0;
9f5ccc
 
9f5ccc
     priv = this->private;
9f5ccc
 
9f5ccc
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
9f5ccc
     }
9f5ccc
 
9f5ccc
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
9f5ccc
+
9f5ccc
+    /*If the selected source does not blame any other brick, then mark
9f5ccc
+     * everything as sink to trigger conservative merge.
9f5ccc
+     */
9f5ccc
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
9f5ccc
+        for (i = 0; i < priv->child_count; i++) {
9f5ccc
+            if (locked_on[i]) {
9f5ccc
+                sources[i] = 0;
9f5ccc
+                healed_sinks[i] = 1;
9f5ccc
+            }
9f5ccc
+        }
9f5ccc
+        return -1;
9f5ccc
+    }
9f5ccc
+
9f5ccc
     return source;
9f5ccc
 }
9f5ccc
 
9f5ccc
-- 
9f5ccc
1.8.3.1
9f5ccc