Blob Blame History Raw
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Wed, 20 Nov 2019 12:26:11 +0530
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
 healed_sinks

Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/

Problem:
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
anything for entry heal, heal will not complete even though we have
clear source and sinks. This will happen because while doing
afr_selfheal_find_direction() only the bricks which are blamed by
non-accused bricks are considered as sinks. Later in
__afr_selfheal_entry_finalize_source() when it tries to mark all the
non-sources as sinks it fails to do so because there won't be any
healed_sinks marked, no witness present and there will be a source.

Fix:
If there is a source and no healed_sinks, then reset all the locked
sources to 0 and healed sinks to 1 to do conservative merge.

Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
Signed-off-by: karthik-us <ksubrahm@redhat.com>
BUG: 1764095
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
 xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
 2 files changed, 104 insertions(+)
 create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t

diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
new file mode 100644
index 0000000..9627908
--- /dev/null
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+function check_gfid_and_link_count
+{
+        local file=$1
+
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+        TEST [ ! -z $file_gfid_b0 ]
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=18
+
+################################################################################
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST `echo "File 1 " > $M0/dir/file1`
+TEST touch $M0/dir/file{2..4}
+
+# Remove file2 from 1st & 3rd bricks
+TEST rm -f $B0/$V0"0"/dir/file2
+TEST rm -f $B0/$V0"2"/dir/file2
+
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm -f $B0/$V0"0"/dir/file3
+TEST rm -f $B0/$V0"1"/dir/file3
+
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+
+# B0 and B2 blame each other
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir on first brick.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# All the files must be present on all the bricks after conservative merge and
+# should have the gfid xattr and the .glusterfs hardlink.
+check_gfid_and_link_count dir/file1
+check_gfid_and_link_count dir/file2
+check_gfid_and_link_count dir/file3
+check_gfid_and_link_count dir/file4
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 35b600f..3ce882e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
     afr_private_t *priv = NULL;
     int source = -1;
     int sources_count = 0;
+    int i = 0;
 
     priv = this->private;
 
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
     }
 
     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
+
+    /*If the selected source does not blame any other brick, then mark
+     * everything as sink to trigger conservative merge.
+     */
+    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+        for (i = 0; i < priv->child_count; i++) {
+            if (locked_on[i]) {
+                sources[i] = 0;
+                healed_sinks[i] = 1;
+            }
+        }
+        return -1;
+    }
+
     return source;
 }
 
-- 
1.8.3.1