74096c
From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001
74096c
From: Ravishankar N <ravishankar@redhat.com>
74096c
Date: Tue, 8 Dec 2020 17:23:22 +0530
74096c
Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default
74096c
74096c
XXXXXXXXXXXXXXXXXXX
74096c
    IMPORTANT:
74096c
XXXXXXXXXXXXXXXXXXXX
74096c
I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since
74096c
this patch should only act on new volumes in rhgs-3.5.4, I am bumping
74096c
the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch
74096c
acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit
74096c
messae below.
74096c
74096c
Upstream patch details:
74096c
/------------------------------------------------------------------------------/
74096c
1. The option has been enabled and tested for quite some time now in RHHI-V
74096c
downstream and I think it is safe to make it 'on' by default. Since it
74096c
is not possible to simply change it from 'off' to 'on' without breaking
74096c
rolling upgrades, old clients etc., I have made it default only for new volumes
74096c
starting from op-verison GD_OP_VERSION_9_0.
74096c
74096c
Note: If you do a volume reset, the option will be turned back off.
74096c
This is okay as the dir's gfid will be captured in 'xattrop' folder  and heals
74096c
will proceed. There might be stale entries inside entry-changes' folder,
74096c
which will be removed when we enable the option again.
74096c
74096c
2. I encountered a cust. issue where entry heal was pending on a dir. with
74096c
236436 files in it and the glustershd.log output was just stuck at
74096c
"performing entry selfheal", so I have added logs to give us
74096c
more info in DEBUG level about whether entry heal and data heal are
74096c
progressing (metadata heal doesn't take much time). That way, we have a
74096c
quick visual indication to say things are not 'stuck' if we briefly
74096c
enable debug logs, instead of taking statedumps or checking profile info
74096c
etc.
74096c
74096c
>Fixes: #1483
74096c
>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422
74096c
>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
74096c
Upstream Patch: https://github.com/gluster/glusterfs/pull/1621
74096c
/------------------------------------------------------------------------------/
74096c
74096c
BUG: 1890506
74096c
Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae
74096c
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/220555
74096c
Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 libglusterfs/src/glusterfs/globals.h               |   4 +-
74096c
 libglusterfs/src/syncop-utils.c                    |   4 +-
74096c
 tests/basic/afr/add-brick-self-heal-non-granular.t |  75 +++++++++++++
74096c
 tests/basic/afr/add-brick-self-heal.t              |   4 +-
74096c
 tests/basic/afr/bug-1130892-non-granular.t         |  77 ++++++++++++++
74096c
 .../basic/afr/bug-1493415-gfid-heal-non-granular.t |  79 ++++++++++++++
74096c
 ...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++
74096c
 ...1749322-entry-heal-not-happening-non-granular.t |  90 ++++++++++++++++
74096c
 .../afr/replace-brick-self-heal-non-granular.t     |  65 ++++++++++++
74096c
 tests/basic/afr/replace-brick-self-heal.t          |   2 +-
74096c
 tests/bugs/replicate/bug-1130892.t                 |   2 +-
74096c
 tests/bugs/replicate/bug-1493415-gfid-heal.t       |   2 +-
74096c
 .../bug-1722507-type-mismatch-error-handling.t     |  26 +++--
74096c
 .../bug-1749322-entry-heal-not-happening.t         |   7 +-
74096c
 xlators/cluster/afr/src/afr-self-heal-common.c     |   5 +
74096c
 xlators/cluster/afr/src/afr-self-heal-data.c       |   3 +
74096c
 xlators/cluster/afr/src/afr-self-heal-entry.c      |   7 +-
74096c
 xlators/mgmt/glusterd/src/glusterd-utils.c         |  13 +++
74096c
 18 files changed, 558 insertions(+), 24 deletions(-)
74096c
 create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t
74096c
 create mode 100644 tests/basic/afr/bug-1130892-non-granular.t
74096c
 create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
74096c
 create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
74096c
 create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
74096c
 create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t
74096c
74096c
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
74096c
index 31717ed..cc145cd 100644
74096c
--- a/libglusterfs/src/glusterfs/globals.h
74096c
+++ b/libglusterfs/src/glusterfs/globals.h
74096c
@@ -50,7 +50,7 @@
74096c
     1 /* MIN is the fresh start op-version, mostly                             \
74096c
          should not change */
74096c
 #define GD_OP_VERSION_MAX                                                      \
74096c
-    GD_OP_VERSION_7_0 /* MAX VERSION is the maximum                            \
74096c
+    GD_OP_VERSION_7_1 /* MAX VERSION is the maximum                            \
74096c
                          count in VME table, should                            \
74096c
                          keep changing with                                    \
74096c
                          introduction of newer                                 \
74096c
@@ -138,6 +138,8 @@
74096c
 
74096c
 #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
74096c
 
74096c
+#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
74096c
+
74096c
 #include "glusterfs/xlator.h"
74096c
 #include "glusterfs/options.h"
74096c
 
74096c
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
74096c
index be03527..2269c76 100644
74096c
--- a/libglusterfs/src/syncop-utils.c
74096c
+++ b/libglusterfs/src/syncop-utils.c
74096c
@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
74096c
             if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
74096c
                 continue;
74096c
 
74096c
-            ret = fn(subvol, entry, loc, data);
74096c
-            if (ret)
74096c
-                break;
74096c
+            ret |= fn(subvol, entry, loc, data);
74096c
         }
74096c
         gf_dirent_free(&entries);
74096c
         if (ret)
74096c
diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..19caf24
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/add-brick-self-heal-non-granular.t
74096c
@@ -0,0 +1,75 @@
74096c
+#!/bin/bash
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
74096c
+EXPECT 'Created' volinfo_field $V0 'Status';
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+TEST $CLI volume start $V0
74096c
+EXPECT 'Started' volinfo_field $V0 'Status';
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
74096c
+
74096c
+TEST $CLI volume set $V0 cluster.data-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.entry-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74096c
+
74096c
+TEST $CLI volume set $V0 self-heal-daemon off
74096c
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
74096c
+
74096c
+# Create files
74096c
+for i in {1..5}
74096c
+do
74096c
+        echo $i > $M0/file$i.txt
74096c
+done
74096c
+
74096c
+# Metadata changes
74096c
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
74096c
+
74096c
+# Add brick1
74096c
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
74096c
+
74096c
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
74096c
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
74096c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
74096c
+
74096c
+# Check if pending xattr and dirty-xattr are set for newly-added-brick
74096c
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
74096c
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
74096c
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
74096c
+
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
74096c
+
74096c
+TEST $CLI volume set $V0 self-heal-daemon on
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
74096c
+TEST $CLI volume heal $V0
74096c
+
74096c
+# Wait for heal to complete
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+
74096c
+# Check if entry-heal has happened
74096c
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
74096c
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
74096c
+
74096c
+# Test if data was healed
74096c
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
74096c
+
74096c
+# Test if metadata was healed and exists on both the bricks
74096c
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
74096c
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
74096c
+
74096c
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
74096c
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
74096c
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
74096c
+
74096c
+cleanup;
74096c
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
74096c
index c847e22..7ebf4f6 100644
74096c
--- a/tests/basic/afr/add-brick-self-heal.t
74096c
+++ b/tests/basic/afr/add-brick-self-heal.t
74096c
@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0
74096c
 TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
74096c
 
74096c
 # Check if pending xattr and dirty-xattr are set for newly-added-brick
74096c
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
74096c
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
74096c
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
74096c
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
74096c
 EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
74096c
 
74096c
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..3cdbc7d
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/bug-1130892-non-granular.t
74096c
@@ -0,0 +1,77 @@
74096c
+#!/bin/bash
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume info;
74096c
+
74096c
+# Create a 1X2 replica
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
74096c
+EXPECT 'Created' volinfo_field $V0 'Status';
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+
74096c
+# Disable self-heal daemon
74096c
+TEST gluster volume set $V0 self-heal-daemon off
74096c
+
74096c
+# Enable Client side heal
74096c
+TEST $CLI volume set $V0 cluster.data-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.entry-self-heal off
74096c
+
74096c
+# Disable all perf-xlators
74096c
+TEST $CLI volume set $V0 performance.quick-read off
74096c
+TEST $CLI volume set $V0 performance.io-cache off
74096c
+TEST $CLI volume set $V0 performance.write-behind off
74096c
+TEST $CLI volume set $V0 performance.stat-prefetch off
74096c
+TEST $CLI volume set $V0 performance.read-ahead off
74096c
+
74096c
+# Volume start
74096c
+TEST $CLI volume start $V0;
74096c
+EXPECT 'Started' volinfo_field $V0 'Status';
74096c
+
74096c
+# FUSE Mount
74096c
+TEST ${GFS} -s $H0 --volfile-id $V0 $M0
74096c
+
74096c
+# Create files and dirs
74096c
+TEST mkdir -p $M0/one/two/
74096c
+TEST `echo "Carpe diem" > $M0/one/two/three`
74096c
+
74096c
+# Simulate disk-replacement
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}-1
74096c
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
74096c
+TEST rm -rf $B0/${V0}-1/one
74096c
+TEST rm -rf $B0/${V0}-1/.glusterfs
74096c
+
74096c
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
74096c
+#which will create .glusterfs folder.
74096c
+mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
74096c
+
74096c
+# Start force
74096c
+TEST $CLI volume start $V0 force
74096c
+
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+
74096c
+TEST stat $M0/one
74096c
+
74096c
+sleep 1
74096c
+
74096c
+# Check pending xattrs
74096c
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
74096c
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
74096c
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
74096c
+
74096c
+TEST gluster volume set $V0 self-heal-daemon on
74096c
+
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+TEST $CLI volume heal $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
74096c
+
74096c
+cleanup;
74096c
diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..aff001c
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
74096c
@@ -0,0 +1,79 @@
74096c
+#!/bin/bash
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+TEST $CLI volume start $V0
74096c
+
74096c
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+TEST $CLI volume set $V0 self-heal-daemon off
74096c
+
74096c
+# Create base entry in indices/xattrop
74096c
+echo "Data" > $M0/FILE
74096c
+
74096c
+#------------------------------------------------------------------------------#
74096c
+TEST touch $M0/f1
74096c
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
74096c
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
74096c
+
74096c
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
74096c
+# brick crash at the point where file got created but no xattrs were set.
74096c
+TEST setfattr -x trusted.gfid $B0/${V0}1/f1
74096c
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
74096c
+
74096c
+# storage/posix considers that a file without gfid changed less than a second
74096c
+# before doesn't exist, so we need to wait for a second to force posix to
74096c
+# consider that this is a valid file but without gfid.
74096c
+sleep 2
74096c
+
74096c
+# Assume there were no pending xattrs on parent dir due to 1st brick crashing
74096c
+# too. Then name heal from client must heal the gfid.
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+TEST stat $M0/f1
74096c
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
74096c
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
74096c
+
74096c
+#------------------------------------------------------------------------------#
74096c
+TEST mkdir $M0/dir
74096c
+TEST touch $M0/dir/f2
74096c
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
74096c
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
74096c
+
74096c
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
74096c
+# brick crash at the point where file got created but no xattrs were set.
74096c
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
74096c
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
74096c
+
74096c
+#Now simulate setting of pending entry xattr on parent dir of 1st brick.
74096c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
74096c
+create_brick_xattrop_entry $B0/${V0}0 dir
74096c
+
74096c
+# storage/posix considers that a file without gfid changed less than a second
74096c
+# before doesn't exist, so we need to wait for a second to force posix to
74096c
+# consider that this is a valid file but without gfid.
74096c
+sleep 2
74096c
+
74096c
+#Trigger entry-heal via shd
74096c
+TEST $CLI volume set $V0 self-heal-daemon on
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+
74096c
+TEST $CLI volume heal $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+
74096c
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
74096c
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
74096c
+
74096c
+#------------------------------------------------------------------------------#
74096c
+cleanup;
74096c
diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..9079c93
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
74096c
@@ -0,0 +1,117 @@
74096c
+#!/bin/bash
74096c
+
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+## Start and create a volume
74096c
+TEST glusterd;
74096c
+TEST pidof glusterd;
74096c
+TEST $CLI volume info;
74096c
+
74096c
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+TEST $CLI volume start $V0;
74096c
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74096c
+TEST $CLI volume heal $V0 disable
74096c
+EXPECT 'Started' volinfo_field $V0 'Status';
74096c
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
74096c
+
74096c
+TEST mkdir $M0/dir
74096c
+
74096c
+##########################################################################################
74096c
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
74096c
+
74096c
+TEST touch $M0/dir/file
74096c
+TEST `echo append>> $M0/dir/file`
74096c
+
74096c
+#B0 and B2 must blame B1
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+
74096c
+# Add entry to xattrop dir to trigger index heal.
74096c
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
+base_entry_b0=`ls $xattrop_dir0`
74096c
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74096c
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
+EXPECT "^1$" get_pending_heal_count $V0
74096c
+
74096c
+# Remove the gfid xattr and the link file on one brick.
74096c
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
74096c
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
74096c
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
74096c
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+
74096c
+# Launch heal
74096c
+TEST $CLI volume heal $V0 enable
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74096c
+
74096c
+# Wait for 2 second to force posix to consider that this is a valid file but
74096c
+# without gfid.
74096c
+sleep 2
74096c
+TEST $CLI volume heal $V0
74096c
+
74096c
+# Heal should not fail as the file is missing gfid xattr and the link file,
74096c
+# which is not actually the gfid or type mismatch.
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+
74096c
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
74096c
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+rm -f $M0/dir/file
74096c
+
74096c
+
74096c
+###########################################################################################
74096c
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
74096c
+
74096c
+TEST $CLI volume heal $V0 disable
74096c
+TEST touch $M0/dir/file
74096c
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
74096c
+
74096c
+#B0 and B2 must blame B1
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+
74096c
+# Add entry to xattrop dir to trigger index heal.
74096c
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
+base_entry_b0=`ls $xattrop_dir0`
74096c
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74096c
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
+EXPECT "^1$" get_pending_heal_count $V0
74096c
+
74096c
+# Remove the gfid xattr and the link file on two bricks.
74096c
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
74096c
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
74096c
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
74096c
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
74096c
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+
74096c
+# Launch heal
74096c
+TEST $CLI volume heal $V0 enable
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74096c
+
74096c
+# Wait for 2 second to force posix to consider that this is a valid file but
74096c
+# without gfid.
74096c
+sleep 2
74096c
+TEST $CLI volume heal $V0
74096c
+
74096c
+# Heal should not fail as the file is missing gfid xattr and the link file,
74096c
+# which is not actually the gfid or type mismatch.
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+
74096c
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
74096c
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
74096c
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
74096c
+
74096c
+cleanup
74096c
diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..4f27da4
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
74096c
@@ -0,0 +1,90 @@
74096c
+#!/bin/bash
74096c
+
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup
74096c
+
74096c
+function check_gfid_and_link_count
74096c
+{
74096c
+        local file=$1
74096c
+
74096c
+        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
74096c
+        TEST [ ! -z $file_gfid_b0 ]
74096c
+        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
74096c
+        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
74096c
+        EXPECT $file_gfid_b0 echo $file_gfid_b1
74096c
+        EXPECT $file_gfid_b0 echo $file_gfid_b2
74096c
+
74096c
+        EXPECT "2" stat -c %h $B0/${V0}0/$file
74096c
+        EXPECT "2" stat -c %h $B0/${V0}1/$file
74096c
+        EXPECT "2" stat -c %h $B0/${V0}2/$file
74096c
+}
74096c
+TESTS_EXPECTED_IN_LOOP=18
74096c
+
74096c
+################################################################################
74096c
+## Start and create a volume
74096c
+TEST glusterd;
74096c
+TEST pidof glusterd;
74096c
+TEST $CLI volume info;
74096c
+
74096c
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+TEST $CLI volume start $V0;
74096c
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74096c
+TEST $CLI volume heal $V0 disable
74096c
+EXPECT 'Started' volinfo_field $V0 'Status';
74096c
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
74096c
+
74096c
+TEST mkdir $M0/dir
74096c
+TEST `echo "File 1 " > $M0/dir/file1`
74096c
+TEST touch $M0/dir/file{2..4}
74096c
+
74096c
+# Remove file2 from 1st & 3rd bricks
74096c
+TEST rm -f $B0/$V0"0"/dir/file2
74096c
+TEST rm -f $B0/$V0"2"/dir/file2
74096c
+
74096c
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
74096c
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
74096c
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
74096c
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
74096c
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
74096c
+TEST rm -f $B0/$V0"0"/dir/file3
74096c
+TEST rm -f $B0/$V0"1"/dir/file3
74096c
+
74096c
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
74096c
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
74096c
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
74096c
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
74096c
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
74096c
+
74096c
+# B0 and B2 blame each other
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+
74096c
+# Add entry to xattrop dir on first brick.
74096c
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
+base_entry_b0=`ls $xattrop_dir0`
74096c
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74096c
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
+
74096c
+EXPECT "^1$" get_pending_heal_count $V0
74096c
+
74096c
+# Launch heal
74096c
+TEST $CLI volume heal $V0 enable
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
74096c
+TEST $CLI volume heal $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+
74096c
+# All the files must be present on all the bricks after conservative merge and
74096c
+# should have the gfid xattr and the .glusterfs hardlink.
74096c
+check_gfid_and_link_count dir/file1
74096c
+check_gfid_and_link_count dir/file2
74096c
+check_gfid_and_link_count dir/file3
74096c
+check_gfid_and_link_count dir/file4
74096c
+
74096c
+cleanup
74096c
diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t
74096c
new file mode 100644
74096c
index 0000000..c86bff1
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t
74096c
@@ -0,0 +1,65 @@
74096c
+#!/bin/bash
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
74096c
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
74096c
+TEST $CLI volume start $V0
74096c
+TEST $CLI volume set $V0 cluster.data-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.entry-self-heal off
74096c
+TEST $CLI volume set $V0 cluster.heal-timeout 5
74096c
+TEST $CLI volume set $V0 self-heal-daemon off
74096c
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
74096c
+
74096c
+# Create files
74096c
+for i in {1..5}
74096c
+do
74096c
+        echo $i > $M0/file$i.txt
74096c
+done
74096c
+
74096c
+# Metadata changes
74096c
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
74096c
+
74096c
+# Replace brick1
74096c
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
74096c
+
74096c
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
74096c
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
74096c
+
74096c
+# Check if pending xattr and dirty-xattr are set for replaced-brick
74096c
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
74096c
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
74096c
+
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+
74096c
+TEST $CLI volume set $V0 self-heal-daemon on
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+TEST $CLI volume heal $V0
74096c
+
74096c
+# Wait for heal to complete
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
74096c
+
74096c
+# Check if entry-heal has happened
74096c
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
74096c
+
74096c
+# To make sure that files were not lost from brick0
74096c
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
74096c
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
74096c
+
74096c
+# Test if data was healed
74096c
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
74096c
+# To make sure that data was not lost from brick0
74096c
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
74096c
+
74096c
+# Test if metadata was healed and exists on both the bricks
74096c
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
74096c
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
74096c
+
74096c
+cleanup;
74096c
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
74096c
index 0360db7..da31c87 100644
74096c
--- a/tests/basic/afr/replace-brick-self-heal.t
74096c
+++ b/tests/basic/afr/replace-brick-self-heal.t
74096c
@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc
74096c
 TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
74096c
 
74096c
 # Check if pending xattr and dirty-xattr are set for replaced-brick
74096c
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
74096c
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
74096c
 EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
74096c
 
74096c
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
74096c
index 0f57d66..e23eb26 100644
74096c
--- a/tests/bugs/replicate/bug-1130892.t
74096c
+++ b/tests/bugs/replicate/bug-1130892.t
74096c
@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
 TEST stat $M0/one
74096c
 
74096c
 # Check pending xattrs
74096c
-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
74096c
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
74096c
 EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
74096c
 EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
74096c
 
74096c
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
74096c
index 125c35a..9714d5e 100644
74096c
--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
74096c
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
74096c
@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
74096c
 TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
74096c
 
74096c
 #Now simulate setting of pending entry xattr on parent dir of 1st brick.
74096c
-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
74096c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir
74096c
 create_brick_xattrop_entry $B0/${V0}0 dir
74096c
 
74096c
 #Trigger entry-heal via shd
74096c
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74096c
index 0aeaaaf..1fdf7ea 100644
74096c
--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74096c
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
74096c
@@ -23,19 +23,21 @@ TEST mkdir $M0/dir
74096c
 ##########################################################################################
74096c
 # GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
74096c
 
74096c
-TEST touch $M0/dir/file
74096c
-#TEST kill_brick $V0 $H0 $B0/$V0"1"
74096c
+TEST `echo append>> $M0/dir/file`
74096c
 
74096c
 #B0 and B2 must blame B1
74096c
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
74096c
+# Another way is to create the needed entries inside indices/entry-changes
74096c
+# folder.
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
74096c
 
74096c
 # Add entry to xattrop dir to trigger index heal.
74096c
 xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
 base_entry_b0=`ls $xattrop_dir0`
74096c
 gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74096c
-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
 EXPECT "^1$" get_pending_heal_count $V0
74096c
 
74096c
 # Remove the gfid xattr and the link file on one brick.
74096c
@@ -70,18 +72,20 @@ rm -f $M0/dir/file
74096c
 
74096c
 TEST $CLI volume heal $V0 disable
74096c
 TEST touch $M0/dir/file
74096c
-#TEST kill_brick $V0 $H0 $B0/$V0"1"
74096c
 
74096c
 #B0 and B2 must blame B1
74096c
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
74096c
+# Another way is to create the needed entries inside indices/entry-changes
74096c
+# folder.
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
74096c
 
74096c
 # Add entry to xattrop dir to trigger index heal.
74096c
 xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
 base_entry_b0=`ls $xattrop_dir0`
74096c
 gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
74096c
-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
74096c
 EXPECT "^1$" get_pending_heal_count $V0
74096c
 
74096c
 # Remove the gfid xattr and the link file on two bricks.
74096c
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74096c
index 9627908..3da873a 100644
74096c
--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74096c
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
74096c
@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_
74096c
 TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
74096c
 
74096c
 # B0 and B2 blame each other
74096c
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
74096c
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
74096c
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
74096c
+# Another way is to create the needed entries inside indices/entry-changes
74096c
+# folder.
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
74096c
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
74096c
 
74096c
 # Add entry to xattrop dir on first brick.
74096c
 xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
index 1608f75..36fd3a9 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
74096c
         }
74096c
     }
74096c
 
74096c
+    gf_msg_debug(
74096c
+        this->name, 0,
74096c
+        "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]",
74096c
+        uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal);
74096c
+
74096c
     if (data_selfheal && priv->data_self_heal)
74096c
         data_ret = afr_selfheal_data(frame, this, fd);
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
74096c
index cdff4a5..b97c66b 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
74096c
@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
     sink_count = AFR_COUNT(healed_sinks, priv->child_count);
74096c
     data_lock = alloca0(priv->child_count);
74096c
 
74096c
+    gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu",
74096c
+                 uuid_utoa(fd->inode->gfid), offset, size);
74096c
+
74096c
     ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
74096c
                                data_lock);
74096c
     {
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
index 40be898..00b5b2d 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
                                             replies);
74096c
         } else {
74096c
             if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
74096c
-                                 replies[source].poststat.ia_gfid))
74096c
+                                 replies[source].poststat.ia_gfid)) {
74096c
+                gf_msg_debug(this->name, 0, "skipping %s, no heal needed.",
74096c
+                             name);
74096c
                 continue;
74096c
+            }
74096c
 
74096c
             ret = afr_selfheal_recreate_entry(frame, i, source, sources,
74096c
                                               fd->inode, name, inode, replies);
74096c
@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
74096c
 
74096c
 out:
74096c
     loc_wipe(&loc;;
74096c
-    return 0;
74096c
+    return ret;
74096c
 }
74096c
 
74096c
 static int
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
index a72c494..bd17a82 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
74096c
@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
74096c
             goto out;
74096c
         }
74096c
     }
74096c
+
74096c
+    if ((conf->op_version >= GD_OP_VERSION_7_1) &&
74096c
+        (volinfo->status == GLUSTERD_STATUS_NONE)) {
74096c
+        ret = dict_set_dynstr_with_alloc(volinfo->dict,
74096c
+                                         "cluster.granular-entry-heal", "on");
74096c
+        if (ret) {
74096c
+            gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
74096c
+                   "Failed to set option 'cluster.granular-entry-heal' "
74096c
+                   "on volume %s",
74096c
+                   volinfo->volname);
74096c
+            goto out;
74096c
+        }
74096c
+    }
74096c
 out:
74096c
     return ret;
74096c
 }
74096c
-- 
74096c
1.8.3.1
74096c