b7d4d7
From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001
b7d4d7
From: karthik-us <ksubrahm@redhat.com>
b7d4d7
Date: Wed, 9 Dec 2020 10:46:31 +0530
b7d4d7
Subject: [PATCH 515/517] cluster/afr: Heal directory rename without
b7d4d7
 rmdir/mkdir
b7d4d7
b7d4d7
Problem1:
b7d4d7
When a directory is renamed while a brick
b7d4d7
is down entry-heal always did an rm -rf on that directory on
b7d4d7
the sink on old location and did mkdir and created the directory
b7d4d7
hierarchy again in the new location. This is inefficient.
b7d4d7
b7d4d7
Problem2:
b7d4d7
Renamedir heal order may lead to a scenario where directory in
b7d4d7
the new location could be created before deleting it from old
b7d4d7
location leading to 2 directories with same gfid in posix.
b7d4d7
b7d4d7
Fix:
b7d4d7
As part of heal, if oldlocation is healed first and is not present in
b7d4d7
source-brick always rename it into a hidden directory inside the
b7d4d7
sink-brick so that when heal is triggered in new-location shd can
b7d4d7
rename it from this hidden directory to the new-location.
b7d4d7
b7d4d7
If new-location heal is triggered first and it detects that the
b7d4d7
directory already exists in the brick, then it should skip healing the
b7d4d7
directory until it appears in the hidden directory.
b7d4d7
b7d4d7
Credits: Ravi for rename-data-loss.t script
b7d4d7
b7d4d7
Upstream patch details:
b7d4d7
> Fixes: #1211
b7d4d7
> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
b7d4d7
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
b7d4d7
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/
b7d4d7
b7d4d7
BUG: 1640148
b7d4d7
Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
b7d4d7
Signed-off-by: karthik-us <ksubrahm@redhat.com>
b7d4d7
Reviewed-on: https://code.engineering.redhat.com/gerrit/220660
b7d4d7
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b7d4d7
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
b7d4d7
---
b7d4d7
 tests/afr.rc                                    |  16 +
b7d4d7
 tests/basic/afr/afr-anon-inode-no-quorum.t      |  63 ++++
b7d4d7
 tests/basic/afr/afr-anon-inode.t                | 114 ++++++
b7d4d7
 tests/basic/afr/entry-self-heal-anon-dir-off.t  | 464 ++++++++++++++++++++++++
b7d4d7
 tests/basic/afr/rename-data-loss.t              |  72 ++++
b7d4d7
 tests/bugs/replicate/bug-1744548-heal-timeout.t |   6 +-
b7d4d7
 tests/features/trash.t                          |  74 ++--
b7d4d7
 xlators/cluster/afr/src/afr-common.c            |  46 ++-
b7d4d7
 xlators/cluster/afr/src/afr-dir-read.c          |  12 +-
b7d4d7
 xlators/cluster/afr/src/afr-self-heal-common.c  | 182 ++++++++++
b7d4d7
 xlators/cluster/afr/src/afr-self-heal-entry.c   | 206 +++++++++--
b7d4d7
 xlators/cluster/afr/src/afr-self-heal-name.c    |  33 +-
b7d4d7
 xlators/cluster/afr/src/afr-self-heal.h         |   5 +
b7d4d7
 xlators/cluster/afr/src/afr-self-heald.c        | 178 ++++++++-
b7d4d7
 xlators/cluster/afr/src/afr-self-heald.h        |   2 +-
b7d4d7
 xlators/cluster/afr/src/afr.c                   |  40 +-
b7d4d7
 xlators/cluster/afr/src/afr.h                   |  11 +
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-volgen.c     |  39 ++
b7d4d7
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |   6 +
b7d4d7
 19 files changed, 1442 insertions(+), 127 deletions(-)
b7d4d7
 create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t
b7d4d7
 create mode 100644 tests/basic/afr/afr-anon-inode.t
b7d4d7
 create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t
b7d4d7
 create mode 100644 tests/basic/afr/rename-data-loss.t
b7d4d7
b7d4d7
diff --git a/tests/afr.rc b/tests/afr.rc
b7d4d7
index 35f352d..2417899 100644
b7d4d7
--- a/tests/afr.rc
b7d4d7
+++ b/tests/afr.rc
b7d4d7
@@ -105,3 +105,19 @@ function get_quorum_type()
b7d4d7
         local repl_id="$3"
b7d4d7
         cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
b7d4d7
 }
b7d4d7
+
b7d4d7
+function afr_private_key_value()
b7d4d7
+{
b7d4d7
+        local v=$1
b7d4d7
+        local m=$2
b7d4d7
+        local replica_id=$3
b7d4d7
+        local key=$4
b7d4d7
+#xargs at the end will strip leading spaces
b7d4d7
+        grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
b7d4d7
+}
b7d4d7
+
b7d4d7
+function afr_anon_entry_count()
b7d4d7
+{
b7d4d7
+    local b=$1
b7d4d7
+    ls $b/.glusterfs-anonymous-inode* | wc -l
b7d4d7
+}
b7d4d7
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
b7d4d7
new file mode 100644
b7d4d7
index 0000000..896ba0c
b7d4d7
--- /dev/null
b7d4d7
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
b7d4d7
@@ -0,0 +1,63 @@
b7d4d7
+#!/bin/bash
b7d4d7
+
b7d4d7
+#Test that anon-inode entry is not cleaned up as long as there exists at least
b7d4d7
+#one valid entry
b7d4d7
+. $(dirname $0)/../../include.rc
b7d4d7
+. $(dirname $0)/../../volume.rc
b7d4d7
+. $(dirname $0)/../../afr.rc
b7d4d7
+
b7d4d7
+cleanup;
b7d4d7
+
b7d4d7
+TEST glusterd
b7d4d7
+TEST pidof glusterd
b7d4d7
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
b7d4d7
+TEST $CLI volume heal $V0 disable
b7d4d7
+TEST $CLI volume set $V0 performance.write-behind off
b7d4d7
+TEST $CLI volume set $V0 performance.read-ahead off
b7d4d7
+TEST $CLI volume set $V0 performance.readdir-ahead off
b7d4d7
+TEST $CLI volume set $V0 performance.open-behind off
b7d4d7
+TEST $CLI volume set $V0 performance.stat-prefetch off
b7d4d7
+TEST $CLI volume set $V0 performance.io-cache off
b7d4d7
+TEST $CLI volume set $V0 performance.quick-read off
b7d4d7
+TEST $CLI volume set $V0 cluster.entry-self-heal off
b7d4d7
+TEST $CLI volume start $V0
b7d4d7
+
b7d4d7
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
b7d4d7
+
b7d4d7
+TEST touch $M0/a $M0/b
b7d4d7
+
b7d4d7
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
b7d4d7
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}0
b7d4d7
+TEST mv $M0/a $M0/a-new
b7d4d7
+TEST mv $M0/b $M0/b-new
b7d4d7
+
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+TEST ! ls $M0/a
b7d4d7
+TEST ! ls $M0/b
b7d4d7
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
b7d4d7
+#Make sure index heal doesn't happen after enabling heal
b7d4d7
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
b7d4d7
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
b7d4d7
+TEST $CLI volume heal $V0 enable
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
b7d4d7
+TEST $CLI volume heal $V0
b7d4d7
+#Allow time for a scan
b7d4d7
+sleep 5
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
b7d4d7
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
b7d4d7
+TEST rm -f $M0/a-new
b7d4d7
+TEST stat $M0/b-new
b7d4d7
+
b7d4d7
+TEST $CLI volume heal $V0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
b7d4d7
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
b7d4d7
+
b7d4d7
+cleanup
b7d4d7
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
b7d4d7
new file mode 100644
b7d4d7
index 0000000..f4cf37a
b7d4d7
--- /dev/null
b7d4d7
+++ b/tests/basic/afr/afr-anon-inode.t
b7d4d7
@@ -0,0 +1,114 @@
b7d4d7
+#!/bin/bash
b7d4d7
+#Tests that afr-anon-inode test cases work fine as expected
b7d4d7
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
b7d4d7
+#so these inodes are kept in a special directory
b7d4d7
+
b7d4d7
+. $(dirname $0)/../../include.rc
b7d4d7
+. $(dirname $0)/../../volume.rc
b7d4d7
+. $(dirname $0)/../../afr.rc
b7d4d7
+
b7d4d7
+cleanup;
b7d4d7
+
b7d4d7
+TEST glusterd
b7d4d7
+TEST pidof glusterd
b7d4d7
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
b7d4d7
+TEST $CLI volume set $V0 performance.quick-read off
b7d4d7
+TEST $CLI volume set $V0 performance.io-cache off
b7d4d7
+TEST $CLI volume set $V0 performance.write-behind off
b7d4d7
+TEST $CLI volume set $V0 performance.stat-prefetch off
b7d4d7
+TEST $CLI volume set $V0 performance.read-ahead off
b7d4d7
+TEST $CLI volume set $V0 performance.open-behind off
b7d4d7
+TEST $CLI volume start $V0
b7d4d7
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
b7d4d7
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
b7d4d7
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
b7d4d7
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
b7d4d7
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
b7d4d7
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
b7d4d7
+TEST mkdir -p $M0/d1/b $M0/d2/a
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}0
b7d4d7
+TEST mv $M0/d2/a $M0/d1
b7d4d7
+TEST mv $M0/d1/b $M0/d2
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b7d4d7
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
b7d4d7
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
b7d4d7
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
b7d4d7
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
b7d4d7
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
b7d4d7
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
b7d4d7
+
b7d4d7
+TEST ! ls $M0/$anon_inode_name
b7d4d7
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
b7d4d7
+
b7d4d7
+#Test purging code path by shd
b7d4d7
+TEST $CLI volume heal $V0 disable
b7d4d7
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
b7d4d7
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
b7d4d7
+TEST ln $M0/del-file $M0/del-file-link
b7d4d7
+TEST ln $M0/l0/file $M0/l1/file-link1
b7d4d7
+TEST ln $M0/l0/file $M0/l2/file-link2
b7d4d7
+TEST mkdir -p $M0/del-recursive-dir/d1
b7d4d7
+
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}0
b7d4d7
+TEST rm -f $M0/del-file $M0/del-file-nolink
b7d4d7
+TEST rm -rf $M0/del-recursive-dir
b7d4d7
+TEST mv $M0/d1/a $M0/d2
b7d4d7
+TEST mv $M0/l0/file $M0/l0/renamed-file
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
b7d4d7
+
b7d4d7
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
b7d4d7
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
b7d4d7
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
b7d4d7
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
b7d4d7
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
b7d4d7
+TEST ! stat $M0/del-file
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
b7d4d7
+TEST ! stat $M0/del-file-nolink
b7d4d7
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
b7d4d7
+TEST ! stat $M0/del-recursive-dir
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
b7d4d7
+TEST ! stat $M0/d1/a
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
b7d4d7
+TEST ! stat $M0/l0/file
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
b7d4d7
+
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}1
b7d4d7
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
b7d4d7
+TEST ! stat $M0/l1/file-link1
b7d4d7
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
b7d4d7
+
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}2
b7d4d7
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
b7d4d7
+TEST ! stat $M0/l2/file-link2
b7d4d7
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
b7d4d7
+
b7d4d7
+#Simulate only anon-inodes present in all bricks
b7d4d7
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
b7d4d7
+
b7d4d7
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}1
b7d4d7
+TEST $CLI volume heal $V0 enable
b7d4d7
+$CLI volume heal $V0
b7d4d7
+sleep 5 #Allow time for completion of one scan
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
b7d4d7
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
b7d4d7
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
b7d4d7
+
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
b7d4d7
+
b7d4d7
+#Test that rename indeed happened instead of rmdir/mkdir
b7d4d7
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
b7d4d7
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
b7d4d7
+cleanup;
b7d4d7
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
b7d4d7
new file mode 100644
b7d4d7
index 0000000..0803a08
b7d4d7
--- /dev/null
b7d4d7
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
b7d4d7
@@ -0,0 +1,464 @@
b7d4d7
+#!/bin/bash
b7d4d7
+
b7d4d7
+#This file checks if missing entry self-heal and entry self-heal are working
b7d4d7
+#as expected.
b7d4d7
+. $(dirname $0)/../../include.rc
b7d4d7
+. $(dirname $0)/../../volume.rc
b7d4d7
+. $(dirname $0)/../../afr.rc
b7d4d7
+
b7d4d7
+cleanup;
b7d4d7
+
b7d4d7
+function get_file_type {
b7d4d7
+        stat -c "%a:%F:%g:%t:%T:%u" $1
b7d4d7
+}
b7d4d7
+
b7d4d7
+function diff_dirs {
b7d4d7
+        diff <(ls $1 | sort) <(ls $2 | sort)
b7d4d7
+}
b7d4d7
+
b7d4d7
+function heal_status {
b7d4d7
+        local f1_path="${1}/${3}"
b7d4d7
+        local f2_path="${2}/${3}"
b7d4d7
+        local insync=""
b7d4d7
+        diff_dirs $f1_path $f2_path
b7d4d7
+        if [ $? -eq 0 ];
b7d4d7
+        then
b7d4d7
+                insync="Y"
b7d4d7
+        else
b7d4d7
+                insync="N"
b7d4d7
+        fi
b7d4d7
+        local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
b7d4d7
+        local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
b7d4d7
+        local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
b7d4d7
+        local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
b7d4d7
+        local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
b7d4d7
+        local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
b7d4d7
+        if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
b7d4d7
+        if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
b7d4d7
+        if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
b7d4d7
+        if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
b7d4d7
+        if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
b7d4d7
+        if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
b7d4d7
+        echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
b7d4d7
+}
b7d4d7
+
b7d4d7
+function is_heal_done {
b7d4d7
+        local zero_xattr="000000000000000000000000"
b7d4d7
+        if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
b7d4d7
+        then
b7d4d7
+                echo "Y"
b7d4d7
+        else
b7d4d7
+                echo "N"
b7d4d7
+        fi
b7d4d7
+}
b7d4d7
+
b7d4d7
+function print_pending_heals {
b7d4d7
+        local result=":"
b7d4d7
+        for i in "$@";
b7d4d7
+        do
b7d4d7
+                if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
b7d4d7
+                then
b7d4d7
+                        result="$result:$i"
b7d4d7
+                fi
b7d4d7
+        done
b7d4d7
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
b7d4d7
+        if [ $result == ":" ]; then result="~"; fi
b7d4d7
+        echo $result
b7d4d7
+}
b7d4d7
+
b7d4d7
+zero_xattr="000000000000000000000000"
b7d4d7
+TEST glusterd
b7d4d7
+TEST pidof glusterd
b7d4d7
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
b7d4d7
+TEST $CLI volume heal $V0 disable
b7d4d7
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
b7d4d7
+TEST $CLI volume set $V0 performance.write-behind off
b7d4d7
+TEST $CLI volume set $V0 performance.read-ahead off
b7d4d7
+TEST $CLI volume set $V0 performance.readdir-ahead off
b7d4d7
+TEST $CLI volume set $V0 performance.open-behind off
b7d4d7
+TEST $CLI volume set $V0 performance.stat-prefetch off
b7d4d7
+TEST $CLI volume set $V0 performance.io-cache off
b7d4d7
+TEST $CLI volume set $V0 performance.quick-read off
b7d4d7
+TEST $CLI volume set $V0 cluster.data-self-heal on
b7d4d7
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
b7d4d7
+TEST $CLI volume set $V0 cluster.entry-self-heal on
b7d4d7
+TEST $CLI volume start $V0
b7d4d7
+
b7d4d7
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
b7d4d7
+cd $M0
b7d4d7
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
b7d4d7
+#spb is split-brain, fool is all fool
b7d4d7
+
b7d4d7
+#source_self_accusing means there exists source and a sink which self-accuses.
b7d4d7
+#This simulates failures where fops failed on the bricks without it going down.
b7d4d7
+#Something like EACCESS/EDQUOT etc
b7d4d7
+
b7d4d7
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
b7d4d7
+TEST mkfifo source_deletions_heal/fifo
b7d4d7
+TEST mknod  source_deletions_heal/block b 4 5
b7d4d7
+TEST mknod  source_deletions_heal/char c 1 5
b7d4d7
+TEST touch  source_deletions_heal/file
b7d4d7
+TEST ln -s  source_deletions_heal/file source_deletions_heal/slink
b7d4d7
+TEST mkdir  source_deletions_heal/dir1
b7d4d7
+TEST mkdir  source_deletions_heal/dir1/dir2
b7d4d7
+
b7d4d7
+TEST mkfifo source_deletions_me/fifo
b7d4d7
+TEST mknod  source_deletions_me/block b 4 5
b7d4d7
+TEST mknod  source_deletions_me/char c 1 5
b7d4d7
+TEST touch  source_deletions_me/file
b7d4d7
+TEST ln -s  source_deletions_me/file source_deletions_me/slink
b7d4d7
+TEST mkdir  source_deletions_me/dir1
b7d4d7
+TEST mkdir  source_deletions_me/dir1/dir2
b7d4d7
+
b7d4d7
+TEST mkfifo source_self_accusing/fifo
b7d4d7
+TEST mknod  source_self_accusing/block b 4 5
b7d4d7
+TEST mknod  source_self_accusing/char c 1 5
b7d4d7
+TEST touch  source_self_accusing/file
b7d4d7
+TEST ln -s  source_self_accusing/file source_self_accusing/slink
b7d4d7
+TEST mkdir  source_self_accusing/dir1
b7d4d7
+TEST mkdir  source_self_accusing/dir1/dir2
b7d4d7
+
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}0
b7d4d7
+
b7d4d7
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
b7d4d7
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
b7d4d7
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
b7d4d7
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
b7d4d7
+
b7d4d7
+#Test that the files are deleted
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
b7d4d7
+
b7d4d7
+
b7d4d7
+TEST mkfifo source_creations_heal/fifo
b7d4d7
+TEST mknod  source_creations_heal/block b 4 5
b7d4d7
+TEST mknod  source_creations_heal/char c 1 5
b7d4d7
+TEST touch  source_creations_heal/file
b7d4d7
+TEST ln -s  source_creations_heal/file source_creations_heal/slink
b7d4d7
+TEST mkdir  source_creations_heal/dir1
b7d4d7
+TEST mkdir  source_creations_heal/dir1/dir2
b7d4d7
+
b7d4d7
+TEST mkfifo source_creations_me/fifo
b7d4d7
+TEST mknod  source_creations_me/block b 4 5
b7d4d7
+TEST mknod  source_creations_me/char c 1 5
b7d4d7
+TEST touch  source_creations_me/file
b7d4d7
+TEST ln -s  source_creations_me/file source_creations_me/slink
b7d4d7
+TEST mkdir  source_creations_me/dir1
b7d4d7
+TEST mkdir  source_creations_me/dir1/dir2
b7d4d7
+
b7d4d7
+$CLI volume stop $V0
b7d4d7
+
b7d4d7
+#simulate fool fool scenario for fool_* dirs
b7d4d7
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
b7d4d7
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
b7d4d7
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
b7d4d7
+
b7d4d7
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
b7d4d7
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
b7d4d7
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
b7d4d7
+
b7d4d7
+$CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}1
b7d4d7
+
b7d4d7
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
b7d4d7
+
b7d4d7
+$CLI volume stop $V0
b7d4d7
+
b7d4d7
+#simulate fool fool scenario for fool_* dirs
b7d4d7
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
b7d4d7
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
b7d4d7
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
b7d4d7
+
b7d4d7
+#simulate self-accusing for source_self_accusing
b7d4d7
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
b7d4d7
+
b7d4d7
+$CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+
b7d4d7
+# Check if conservative merges happened correctly on _me_ dirs
b7d4d7
+TEST stat spb_me_heal/1
b7d4d7
+TEST stat $B0/${V0}0/spb_me_heal/1
b7d4d7
+TEST stat $B0/${V0}1/spb_me_heal/1
b7d4d7
+
b7d4d7
+TEST stat spb_me_heal/0
b7d4d7
+TEST stat $B0/${V0}0/spb_me_heal/0
b7d4d7
+TEST stat $B0/${V0}1/spb_me_heal/0
b7d4d7
+
b7d4d7
+TEST stat fool_me/1
b7d4d7
+TEST stat $B0/${V0}0/fool_me/1
b7d4d7
+TEST stat $B0/${V0}1/fool_me/1
b7d4d7
+
b7d4d7
+TEST stat fool_me/0
b7d4d7
+TEST stat $B0/${V0}0/fool_me/0
b7d4d7
+TEST stat $B0/${V0}1/fool_me/0
b7d4d7
+
b7d4d7
+TEST stat v1_fool_me/0
b7d4d7
+TEST stat $B0/${V0}0/v1_fool_me/0
b7d4d7
+TEST stat $B0/${V0}1/v1_fool_me/0
b7d4d7
+
b7d4d7
+TEST stat v1_fool_me/1
b7d4d7
+TEST stat $B0/${V0}0/v1_fool_me/1
b7d4d7
+TEST stat $B0/${V0}1/v1_fool_me/1
b7d4d7
+
b7d4d7
+TEST stat v1_dirty_me/0
b7d4d7
+TEST stat $B0/${V0}0/v1_dirty_me/0
b7d4d7
+TEST stat $B0/${V0}1/v1_dirty_me/0
b7d4d7
+
b7d4d7
+#Check if files that have gfid-mismatches in _me_ are giving EIO
b7d4d7
+TEST ! stat spb_me/0
b7d4d7
+
b7d4d7
+#Check if stale files are deleted on access
b7d4d7
+TEST ! stat source_deletions_me/fifo
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
b7d4d7
+TEST ! stat source_deletions_me/block
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/block
b7d4d7
+TEST ! stat source_deletions_me/char
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/char
b7d4d7
+TEST ! stat source_deletions_me/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/file
b7d4d7
+TEST ! stat source_deletions_me/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/file
b7d4d7
+TEST ! stat source_deletions_me/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
b7d4d7
+TEST ! stat source_deletions_me/dir1
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
b7d4d7
+
b7d4d7
+#Test if the files created as part of access are healed correctly
b7d4d7
+r=$(get_file_type source_creations_me/fifo)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
b7d4d7
+TEST [ -p source_creations_me/fifo ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/block)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
b7d4d7
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
b7d4d7
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
b7d4d7
+TEST [ -b source_creations_me/block ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/char)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
b7d4d7
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
b7d4d7
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
b7d4d7
+TEST [ -c source_creations_me/char ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/file)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
b7d4d7
+TEST [ -f source_creations_me/file ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/slink)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
b7d4d7
+TEST [ -h source_creations_me/slink ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/dir1/dir2)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
b7d4d7
+TEST [ -d source_creations_me/dir1/dir2 ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_me/dir1)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
b7d4d7
+TEST [ -d source_creations_me/dir1 ]
b7d4d7
+
b7d4d7
+#Trigger heal and check _heal dirs are healed properly
b7d4d7
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}1
b7d4d7
+$CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+
b7d4d7
+TEST stat spb_heal
b7d4d7
+TEST stat spb_me_heal
b7d4d7
+TEST stat fool_heal
b7d4d7
+TEST stat fool_me
b7d4d7
+TEST stat v1_fool_heal
b7d4d7
+TEST stat v1_fool_me
b7d4d7
+TEST stat source_deletions_heal
b7d4d7
+TEST stat source_deletions_me
b7d4d7
+TEST stat source_self_accusing
b7d4d7
+TEST stat source_creations_heal
b7d4d7
+TEST stat source_creations_me
b7d4d7
+TEST stat v1_dirty_heal
b7d4d7
+TEST stat v1_dirty_me
b7d4d7
+TEST $CLI volume stop $V0
b7d4d7
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
b7d4d7
+
b7d4d7
+$CLI volume start $V0
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+
b7d4d7
+#Create base entry in indices/xattrop
b7d4d7
+echo "Data" > $M0/FILE
b7d4d7
+rm -f $M0/FILE
b7d4d7
+EXPECT "1" count_index_entries $B0/${V0}0
b7d4d7
+EXPECT "1" count_index_entries $B0/${V0}1
b7d4d7
+
b7d4d7
+TEST $CLI volume stop $V0;
b7d4d7
+
b7d4d7
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
b7d4d7
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
b7d4d7
+
b7d4d7
+$CLI volume start $V0
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+
b7d4d7
+$CLI volume heal $V0 enable
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
b7d4d7
+
b7d4d7
+TEST $CLI volume heal $V0;
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
b7d4d7
+
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
b7d4d7
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
b7d4d7
+
b7d4d7
+#Don't access the files/dirs from mount point as that may cause self-heals
b7d4d7
+# Check if conservative merges happened correctly on heal dirs
b7d4d7
+TEST stat $B0/${V0}0/spb_heal/1
b7d4d7
+TEST stat $B0/${V0}1/spb_heal/1
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/spb_heal/0
b7d4d7
+TEST stat $B0/${V0}1/spb_heal/0
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/fool_heal/1
b7d4d7
+TEST stat $B0/${V0}1/fool_heal/1
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/fool_heal/0
b7d4d7
+TEST stat $B0/${V0}1/fool_heal/0
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/v1_fool_heal/0
b7d4d7
+TEST stat $B0/${V0}1/v1_fool_heal/0
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/v1_fool_heal/1
b7d4d7
+TEST stat $B0/${V0}1/v1_fool_heal/1
b7d4d7
+
b7d4d7
+TEST stat $B0/${V0}0/v1_dirty_heal/0
b7d4d7
+TEST stat $B0/${V0}1/v1_dirty_heal/0
b7d4d7
+
b7d4d7
+#Check if files that have gfid-mismatches in spb are giving EIO
b7d4d7
+TEST ! stat spb/0
b7d4d7
+
b7d4d7
+#Check if stale files are deleted on access
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
b7d4d7
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
b7d4d7
+
b7d4d7
+#Check if stale files are deleted on access
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/block
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/block
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/char
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/char
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/file
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/file
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
b7d4d7
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
b7d4d7
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
b7d4d7
+
b7d4d7
+#Test if the files created as part of full self-heal correctly
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
b7d4d7
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
b7d4d7
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
b7d4d7
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
b7d4d7
+
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
b7d4d7
+
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
b7d4d7
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
b7d4d7
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
b7d4d7
+
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
b7d4d7
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
b7d4d7
+
b7d4d7
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
b7d4d7
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
b7d4d7
+
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
b7d4d7
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
b7d4d7
+
b7d4d7
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
b7d4d7
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
b7d4d7
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
b7d4d7
+
b7d4d7
+cd -
b7d4d7
+
b7d4d7
+#Anonymous directory shouldn't be created
b7d4d7
+TEST mkdir $M0/rename-dir
b7d4d7
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
b7d4d7
+TEST kill_brick $V0 $H0 $B0/${V0}1
b7d4d7
+TEST mv $M0/rename-dir $M0/new-name
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+#Since features.ctime is not enabled by default in downstream, the below test
b7d4d7
+#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata
b7d4d7
+#xattr set which will differ for the parent in the gfid split-brain scenario
b7d4d7
+#and when lookup is triggered, the gfid gets added to indices/xattrop leading
b7d4d7
+#the below test to pass in upstream. Hence commenting it here.
b7d4d7
+#'spb' is in split-brain so pending-heal-count will be 2
b7d4d7
+#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
b7d4d7
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
b7d4d7
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
b7d4d7
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
b7d4d7
+EXPECT_NOT "$before_rename" echo $after_rename
b7d4d7
+cleanup
b7d4d7
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
b7d4d7
new file mode 100644
b7d4d7
index 0000000..256ee2a
b7d4d7
--- /dev/null
b7d4d7
+++ b/tests/basic/afr/rename-data-loss.t
b7d4d7
@@ -0,0 +1,72 @@
b7d4d7
+#!/bin/bash
b7d4d7
+#Self-heal tests
b7d4d7
+. $(dirname $0)/../../include.rc
b7d4d7
+. $(dirname $0)/../../volume.rc
b7d4d7
+. $(dirname $0)/../../afr.rc
b7d4d7
+
b7d4d7
+cleanup;
b7d4d7
+
b7d4d7
+TEST glusterd
b7d4d7
+TEST pidof glusterd
b7d4d7
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
b7d4d7
+TEST $CLI volume set $V0 write-behind off
b7d4d7
+TEST $CLI volume set $V0 self-heal-daemon off
b7d4d7
+TEST $CLI volume set $V0 data-self-heal off
b7d4d7
+TEST $CLI volume set $V0 metadata-self-heal off
b7d4d7
+TEST $CLI volume set $V0 entry-self-heal off
b7d4d7
+TEST $CLI volume start $V0
b7d4d7
+EXPECT 'Started' volinfo_field $V0 'Status'
b7d4d7
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
b7d4d7
+
b7d4d7
+cd $M0
b7d4d7
+TEST `echo "line1" >> file1`
b7d4d7
+TEST mkdir dir1
b7d4d7
+TEST mkdir dir2
b7d4d7
+TEST mkdir -p dir1/dira/dirb
b7d4d7
+TEST `echo "line1">>dir1/dira/dirb/file1`
b7d4d7
+TEST mkdir delete_me
b7d4d7
+TEST `echo "line1" >> delete_me/file1`
b7d4d7
+
b7d4d7
+#brick0 has witnessed the second write while brick1 is down.
b7d4d7
+TEST kill_brick $V0 $H0 $B0/brick1
b7d4d7
+TEST `echo "line2" >> file1`
b7d4d7
+TEST `echo "line2" >> dir1/dira/dirb/file1`
b7d4d7
+TEST `echo "line2" >> delete_me/file1`
b7d4d7
+
b7d4d7
+#Toggle the bricks that are up/down.
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
b7d4d7
+TEST kill_brick $V0 $H0 $B0/brick0
b7d4d7
+
b7d4d7
+#Rename when the 'source' brick0 for data-selfheals is down.
b7d4d7
+mv file1 file2
b7d4d7
+mv dir1/dira dir2
b7d4d7
+
b7d4d7
+#Delete a dir when brick0 is down.
b7d4d7
+rm -rf delete_me
b7d4d7
+cd -
b7d4d7
+
b7d4d7
+#Bring everything up and trigger heal
b7d4d7
+TEST $CLI volume set $V0 self-heal-daemon on
b7d4d7
+TEST $CLI volume start $V0 force
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
b7d4d7
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
b7d4d7
+TEST $CLI volume heal $V0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
b7d4d7
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
b7d4d7
+
b7d4d7
+#Remount to avoid reading from caches
b7d4d7
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
b7d4d7
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
b7d4d7
+EXPECT "line2" tail -1 $M0/file2
b7d4d7
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
b7d4d7
+TEST ! stat $M0/delete_me/file1
b7d4d7
+TEST ! stat $M0/delete_me
b7d4d7
+
b7d4d7
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
b7d4d7
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
b7d4d7
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
b7d4d7
+cleanup
b7d4d7
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
b7d4d7
index c208112..0115350 100644
b7d4d7
--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
b7d4d7
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
b7d4d7
@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
b7d4d7
 TEST $CLI volume profile $V0 start
b7d4d7
 TEST $CLI volume profile $V0 info clear
b7d4d7
 TEST $CLI volume heal $V0 enable
b7d4d7
-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
b7d4d7
-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
b7d4d7
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
b7d4d7
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
b7d4d7
 
b7d4d7
 # Check that a change in heal-timeout is honoured immediately.
b7d4d7
 TEST $CLI volume set $V0 cluster.heal-timeout 5
b7d4d7
 sleep 10
b7d4d7
 # Two crawls must have happened.
b7d4d7
-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
b7d4d7
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
b7d4d7
 
b7d4d7
 # shd must not heal if it is disabled and heal-timeout is changed.
b7d4d7
 TEST $CLI volume heal $V0 disable
b7d4d7
diff --git a/tests/features/trash.t b/tests/features/trash.t
b7d4d7
index 472e909..da5b50b 100755
b7d4d7
--- a/tests/features/trash.t
b7d4d7
+++ b/tests/features/trash.t
b7d4d7
@@ -94,105 +94,105 @@ wildcard_not_exists() {
b7d4d7
         if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
b7d4d7
 }
b7d4d7
 
b7d4d7
-# testing glusterd [1-3]
b7d4d7
+# testing glusterd
b7d4d7
 TEST glusterd
b7d4d7
 TEST pidof glusterd
b7d4d7
 TEST $CLI volume info
b7d4d7
 
b7d4d7
-# creating distributed volume [4]
b7d4d7
+# creating distributed volume
b7d4d7
 TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
b7d4d7
 
b7d4d7
-# checking volume status [5-7]
b7d4d7
+# checking volume status
b7d4d7
 EXPECT "$V0" volinfo_field $V0 'Volume Name'
b7d4d7
 EXPECT 'Created' volinfo_field $V0 'Status'
b7d4d7
 EXPECT '2' brick_count $V0
b7d4d7
 
b7d4d7
-# test without enabling trash translator [8]
b7d4d7
+# test without enabling trash translator
b7d4d7
 TEST start_vol $V0 $M0
b7d4d7
 
b7d4d7
-# test on enabling trash translator [9-10]
b7d4d7
+# test on enabling trash translator
b7d4d7
 TEST $CLI volume set $V0 features.trash on
b7d4d7
 EXPECT 'on' volinfo_field $V0 'features.trash'
b7d4d7
 
b7d4d7
-# files directly under mount point [11]
b7d4d7
+# files directly under mount point
b7d4d7
 create_files $M0/file1 $M0/file2
b7d4d7
 TEST file_exists $V0 file1 file2
b7d4d7
 
b7d4d7
-# perform unlink [12]
b7d4d7
+# perform unlink
b7d4d7
 TEST unlink_op file1
b7d4d7
 
b7d4d7
-# perform truncate [13]
b7d4d7
+# perform truncate
b7d4d7
 TEST truncate_op file2 4
b7d4d7
 
b7d4d7
-# create files directory hierarchy and check [14]
b7d4d7
+# create files directory hierarchy and check
b7d4d7
 mkdir -p $M0/1/2/3
b7d4d7
 create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
b7d4d7
 TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
b7d4d7
 
b7d4d7
-# perform unlink [15]
b7d4d7
+# perform unlink
b7d4d7
 TEST unlink_op 1/2/3/foo1
b7d4d7
 
b7d4d7
-# perform truncate [16]
b7d4d7
+# perform truncate
b7d4d7
 TEST truncate_op 1/2/3/foo2 4
b7d4d7
 
b7d4d7
 # create a directory for eliminate pattern
b7d4d7
 mkdir $M0/a
b7d4d7
 
b7d4d7
-# set the eliminate pattern [17-18]
b7d4d7
+# set the eliminate pattern
b7d4d7
 TEST $CLI volume set $V0 features.trash-eliminate-path /a
b7d4d7
 EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
b7d4d7
 
b7d4d7
-# create two files and check [19]
b7d4d7
+# create two files and check
b7d4d7
 create_files $M0/a/test1 $M0/a/test2
b7d4d7
 TEST file_exists $V0 a/test1 a/test2
b7d4d7
 
b7d4d7
-# remove from eliminate pattern [20]
b7d4d7
+# remove from eliminate pattern
b7d4d7
 rm -f $M0/a/test1
b7d4d7
 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
b7d4d7
 
b7d4d7
-# truncate from eliminate path [21-23]
b7d4d7
+# truncate from eliminate path
b7d4d7
 truncate -s 2 $M0/a/test2
b7d4d7
 TEST [ -e $M0/a/test2 ]
b7d4d7
 TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
b7d4d7
 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
b7d4d7
 
b7d4d7
-# set internal op on [24-25]
b7d4d7
+# set internal op on
b7d4d7
 TEST $CLI volume set $V0 features.trash-internal-op on
b7d4d7
 EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
b7d4d7
 
b7d4d7
-# again create two files and check [26]
b7d4d7
+# again create two files and check
b7d4d7
 create_files $M0/inop1 $M0/inop2
b7d4d7
 TEST file_exists $V0 inop1 inop2
b7d4d7
 
b7d4d7
-# perform unlink [27]
b7d4d7
+# perform unlink
b7d4d7
 TEST unlink_op inop1
b7d4d7
 
b7d4d7
-# perform truncate [28]
b7d4d7
+# perform truncate
b7d4d7
 TEST truncate_op inop2 4
b7d4d7
 
b7d4d7
-# remove one brick and restart the volume [28-31]
b7d4d7
+# remove one brick and restart the volume
b7d4d7
 TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
b7d4d7
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
b7d4d7
 TEST $CLI volume stop $V0
b7d4d7
 TEST start_vol $V0 $M0 $M0/.trashcan
b7d4d7
 
b7d4d7
-# again create two files and check [33]
b7d4d7
+# again create two files and check
b7d4d7
 create_files $M0/rebal1 $M0/rebal2
b7d4d7
 TEST file_exists $V0 rebal1 rebal2
b7d4d7
 
b7d4d7
-# add one brick [34-35]
b7d4d7
+# add one brick
b7d4d7
 TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
b7d4d7
 TEST [ -d $B0/${V0}3 ]
b7d4d7
 
b7d4d7
 
b7d4d7
-# perform rebalance [36]
b7d4d7
+# perform rebalance
b7d4d7
 TEST $CLI volume rebalance $V0 start force
b7d4d7
 EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
b7d4d7
 
b7d4d7
 #Find out which file was migrated to the new brick
b7d4d7
 file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
b7d4d7
 
b7d4d7
-# check whether rebalance was succesful [37-40]
b7d4d7
+# check whether rebalance was succesful
b7d4d7
 EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
b7d4d7
 EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
b7d4d7
 
b7d4d7
@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
b7d4d7
 # force required in case rebalance is not over
b7d4d7
 TEST $CLI volume stop $V0 force
b7d4d7
 
b7d4d7
-# create a replicated volume [41]
b7d4d7
+# create a replicated volume
b7d4d7
 TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
b7d4d7
 
b7d4d7
-# checking volume status [42-45]
b7d4d7
+# checking volume status
b7d4d7
 EXPECT "$V1" volinfo_field $V1 'Volume Name'
b7d4d7
 EXPECT 'Replicate' volinfo_field $V1 'Type'
b7d4d7
 EXPECT 'Created' volinfo_field $V1 'Status'
b7d4d7
 EXPECT '2' brick_count $V1
b7d4d7
 
b7d4d7
-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
b7d4d7
+# enable trash with options and start the replicate volume by disabling automatic self-heal
b7d4d7
 TEST $CLI volume set $V1 features.trash on
b7d4d7
 TEST $CLI volume set $V1 features.trash-internal-op on
b7d4d7
 EXPECT 'on' volinfo_field $V1 'features.trash'
b7d4d7
 EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
b7d4d7
 TEST start_vol $V1 $M1 $M1/.trashcan
b7d4d7
 
b7d4d7
-# mount and check for trash directory [51]
b7d4d7
+# mount and check for trash directory
b7d4d7
 TEST [ -d $M1/.trashcan/internal_op ]
b7d4d7
 
b7d4d7
-# create a file and check [52]
b7d4d7
+# create a file and check
b7d4d7
 touch $M1/self
b7d4d7
 TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
b7d4d7
 
b7d4d7
-# kill one brick and delete the file from mount point [53-54]
b7d4d7
+# kill one brick and delete the file from mount point
b7d4d7
 kill_brick $V1 $H0 $B0/${V1}1
b7d4d7
 EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
b7d4d7
 rm -f $M1/self
b7d4d7
 EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
b7d4d7
 
b7d4d7
-# force start the volume and trigger the self-heal manually [55-57]
b7d4d7
-TEST $CLI volume start $V1 force
b7d4d7
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
b7d4d7
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
b7d4d7
-# Since we created the file under root of the volume, it will be
b7d4d7
-# healed automatically
b7d4d7
-
b7d4d7
-# check for the removed file in trashcan [58]
b7d4d7
-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
b7d4d7
-
b7d4d7
-# check renaming of trash directory through cli [59-62]
b7d4d7
+# check renaming of trash directory through cli
b7d4d7
 TEST $CLI volume set $V0 trash-dir abc
b7d4d7
 TEST start_vol $V0 $M0 $M0/abc
b7d4d7
 TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
b7d4d7
 EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
b7d4d7
 
b7d4d7
-# ensure that rename and delete operation on trash directory fails [63-65]
b7d4d7
+# ensure that rename and delete operation on trash directory fails
b7d4d7
 rm -rf $M0/abc/internal_op
b7d4d7
 TEST [ -e $M0/abc/internal_op ]
b7d4d7
 rm -rf $M0/abc/
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
b7d4d7
index 90b4f14..6f2da11 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-common.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-common.c
b7d4d7
@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv)
b7d4d7
     return ENOTCONN;
b7d4d7
 }
b7d4d7
 
b7d4d7
+gf_boolean_t
b7d4d7
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
b7d4d7
+                         pid_t pid)
b7d4d7
+{
b7d4d7
+    if (!__is_root_gfid(pargfid)) {
b7d4d7
+        return _gf_false;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
b7d4d7
+        /*For backward compatibility /.landfill is private*/
b7d4d7
+        return _gf_true;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (pid == GF_CLIENT_PID_GSYNCD) {
b7d4d7
+        /*geo-rep needs to create/sync private directory on slave because
b7d4d7
+         * it appears in changelog*/
b7d4d7
+        return _gf_false;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
b7d4d7
+        if (strcmp(name, priv->anon_inode_name) == 0) {
b7d4d7
+            /* anonymous-inode dir is private*/
b7d4d7
+            return _gf_true;
b7d4d7
+        }
b7d4d7
+    } else {
b7d4d7
+        if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
b7d4d7
+            0) {
b7d4d7
+            /* anonymous-inode dir prefix is private for geo-rep to work*/
b7d4d7
+            return _gf_true;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    return _gf_false;
b7d4d7
+}
b7d4d7
+
b7d4d7
 int
b7d4d7
 afr_fav_child_reset_sink_xattrs(void *opaque);
b7d4d7
 
b7d4d7
@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
b7d4d7
         return 0;
b7d4d7
     }
b7d4d7
 
b7d4d7
-    if (__is_root_gfid(loc->parent->gfid)) {
b7d4d7
-        if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
b7d4d7
-            op_errno = EPERM;
b7d4d7
-            goto out;
b7d4d7
-        }
b7d4d7
+    if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
b7d4d7
+                                 frame->root->pid)) {
b7d4d7
+        op_errno = EPERM;
b7d4d7
+        goto out;
b7d4d7
     }
b7d4d7
 
b7d4d7
     local = AFR_FRAME_INIT(frame, op_errno);
b7d4d7
@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this)
b7d4d7
                        priv->background_self_heal_count);
b7d4d7
     gf_proc_dump_write("healers", "%d", priv->healers);
b7d4d7
     gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
b7d4d7
+    gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
b7d4d7
     if (priv->quorum_count == AFR_QUORUM_AUTO) {
b7d4d7
         gf_proc_dump_write("quorum-type", "auto");
b7d4d7
     } else if (priv->quorum_count == 0) {
b7d4d7
@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv)
b7d4d7
     GF_FREE(priv->local);
b7d4d7
     GF_FREE(priv->pending_key);
b7d4d7
     GF_FREE(priv->children);
b7d4d7
+    GF_FREE(priv->anon_inode);
b7d4d7
     GF_FREE(priv->child_up);
b7d4d7
     GF_FREE(priv->child_latency);
b7d4d7
     LOCK_DESTROY(&priv->lock);
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
b7d4d7
index 6307b63..d64b6a9 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-dir-read.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-dir-read.c
b7d4d7
@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
b7d4d7
 }
b7d4d7
 
b7d4d7
 static void
b7d4d7
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
b7d4d7
-                              gf_dirent_t *entries, fd_t *fd)
b7d4d7
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
b7d4d7
+                              int subvol, gf_dirent_t *entries, fd_t *fd)
b7d4d7
 {
b7d4d7
     int ret = -1;
b7d4d7
     gf_dirent_t *entry = NULL;
b7d4d7
@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
b7d4d7
 
b7d4d7
     list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
b7d4d7
     {
b7d4d7
-        if (__is_root_gfid(fd->inode->gfid) &&
b7d4d7
-            !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
b7d4d7
+        if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
b7d4d7
+                                     frame->root->pid)) {
b7d4d7
             continue;
b7d4d7
         }
b7d4d7
 
b7d4d7
@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
b7d4d7
     }
b7d4d7
 
b7d4d7
     if (op_ret >= 0)
b7d4d7
-        afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
b7d4d7
-                                      local->fd);
b7d4d7
+        afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
b7d4d7
+                                      &entries, local->fd);
b7d4d7
 
b7d4d7
     AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
b7d4d7
 
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
b7d4d7
index 9b6575f..0a8a7fd 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
b7d4d7
@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
b7d4d7
 out:
b7d4d7
     return source;
b7d4d7
 }
b7d4d7
+
b7d4d7
+static int
b7d4d7
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
b7d4d7
+                         int32_t op_ret, int32_t op_errno, inode_t *inode,
b7d4d7
+                         struct iatt *buf, struct iatt *preparent,
b7d4d7
+                         struct iatt *postparent, dict_t *xdata)
b7d4d7
+{
b7d4d7
+    afr_local_t *local = frame->local;
b7d4d7
+    int i = (long)cookie;
b7d4d7
+
b7d4d7
+    local->replies[i].valid = 1;
b7d4d7
+    local->replies[i].op_ret = op_ret;
b7d4d7
+    local->replies[i].op_errno = op_errno;
b7d4d7
+    if (op_ret == 0) {
b7d4d7
+        local->op_ret = 0;
b7d4d7
+        local->replies[i].poststat = *buf;
b7d4d7
+        local->replies[i].preparent = *preparent;
b7d4d7
+        local->replies[i].postparent = *postparent;
b7d4d7
+    }
b7d4d7
+    if (xdata) {
b7d4d7
+        local->replies[i].xdata = dict_ref(xdata);
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    syncbarrier_wake(&local->barrier);
b7d4d7
+    return 0;
b7d4d7
+}
b7d4d7
+
b7d4d7
+int
b7d4d7
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
b7d4d7
+{
b7d4d7
+    call_frame_t *frame = NULL;
b7d4d7
+    afr_local_t *local = NULL;
b7d4d7
+    afr_private_t *priv = this->private;
b7d4d7
+    unsigned char *mkdir_on = alloca0(priv->child_count);
b7d4d7
+    unsigned char *lookup_on = alloca0(priv->child_count);
b7d4d7
+    loc_t loc = {0};
b7d4d7
+    int32_t op_errno = 0;
b7d4d7
+    int32_t child_op_errno = 0;
b7d4d7
+    struct iatt iatt = {0};
b7d4d7
+    dict_t *xdata = NULL;
b7d4d7
+    uuid_t anon_inode_gfid = {0};
b7d4d7
+    int mkdir_count = 0;
b7d4d7
+    int i = 0;
b7d4d7
+
b7d4d7
+    /*Try to mkdir everywhere and return success if the dir exists on 'child'
b7d4d7
+     */
b7d4d7
+
b7d4d7
+    if (!priv->use_anon_inode) {
b7d4d7
+        op_errno = EINVAL;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    frame = afr_frame_create(this, &op_errno);
b7d4d7
+    if (op_errno) {
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    local = frame->local;
b7d4d7
+    if (!local->child_up[child]) {
b7d4d7
+        /*Other bricks may need mkdir so don't error out yet*/
b7d4d7
+        child_op_errno = ENOTCONN;
b7d4d7
+    }
b7d4d7
+    gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
b7d4d7
+    for (i = 0; i < priv->child_count; i++) {
b7d4d7
+        if (!local->child_up[i])
b7d4d7
+            continue;
b7d4d7
+
b7d4d7
+        if (priv->anon_inode[i]) {
b7d4d7
+            mkdir_on[i] = 0;
b7d4d7
+        } else {
b7d4d7
+            mkdir_on[i] = 1;
b7d4d7
+            mkdir_count++;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (mkdir_count == 0) {
b7d4d7
+        *linked_inode = inode_find(this->itable, anon_inode_gfid);
b7d4d7
+        if (*linked_inode) {
b7d4d7
+            op_errno = 0;
b7d4d7
+            goto out;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    loc.parent = inode_ref(this->itable->root);
b7d4d7
+    loc.name = priv->anon_inode_name;
b7d4d7
+    loc.inode = inode_new(this->itable);
b7d4d7
+    if (!loc.inode) {
b7d4d7
+        op_errno = ENOMEM;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    xdata = dict_new();
b7d4d7
+    if (!xdata) {
b7d4d7
+        op_errno = ENOMEM;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
b7d4d7
+    if (op_errno) {
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (mkdir_count == 0) {
b7d4d7
+        memcpy(lookup_on, local->child_up, priv->child_count);
b7d4d7
+        goto lookup;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
b7d4d7
+               xdata);
b7d4d7
+
b7d4d7
+    for (i = 0; i < priv->child_count; i++) {
b7d4d7
+        if (!mkdir_on[i]) {
b7d4d7
+            continue;
b7d4d7
+        }
b7d4d7
+
b7d4d7
+        if (local->replies[i].op_ret == 0) {
b7d4d7
+            priv->anon_inode[i] = 1;
b7d4d7
+            iatt = local->replies[i].poststat;
b7d4d7
+        } else if (local->replies[i].op_ret < 0 &&
b7d4d7
+                   local->replies[i].op_errno == EEXIST) {
b7d4d7
+            lookup_on[i] = 1;
b7d4d7
+        } else if (i == child) {
b7d4d7
+            child_op_errno = local->replies[i].op_errno;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
b7d4d7
+        goto link;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+lookup:
b7d4d7
+    AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
b7d4d7
+               xdata);
b7d4d7
+    for (i = 0; i < priv->child_count; i++) {
b7d4d7
+        if (!lookup_on[i]) {
b7d4d7
+            continue;
b7d4d7
+        }
b7d4d7
+
b7d4d7
+        if (local->replies[i].op_ret == 0) {
b7d4d7
+            if (gf_uuid_compare(anon_inode_gfid,
b7d4d7
+                                local->replies[i].poststat.ia_gfid) == 0) {
b7d4d7
+                priv->anon_inode[i] = 1;
b7d4d7
+                iatt = local->replies[i].poststat;
b7d4d7
+            } else {
b7d4d7
+                if (i == child)
b7d4d7
+                    child_op_errno = EINVAL;
b7d4d7
+                gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
b7d4d7
+                       "%s has gfid: %s", priv->anon_inode_name,
b7d4d7
+                       uuid_utoa(local->replies[i].poststat.ia_gfid));
b7d4d7
+            }
b7d4d7
+        } else if (i == child) {
b7d4d7
+            child_op_errno = local->replies[i].op_errno;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+link:
b7d4d7
+    if (!gf_uuid_is_null(iatt.ia_gfid)) {
b7d4d7
+        *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
b7d4d7
+        if (*linked_inode) {
b7d4d7
+            op_errno = 0;
b7d4d7
+            inode_lookup(*linked_inode);
b7d4d7
+        } else {
b7d4d7
+            op_errno = ENOMEM;
b7d4d7
+        }
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    if (xdata)
b7d4d7
+        dict_unref(xdata);
b7d4d7
+    loc_wipe(&loc;;
b7d4d7
+    /*child_op_errno takes precedence*/
b7d4d7
+    if (child_op_errno == 0) {
b7d4d7
+        child_op_errno = op_errno;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (child_op_errno && *linked_inode) {
b7d4d7
+        inode_unref(*linked_inode);
b7d4d7
+        *linked_inode = NULL;
b7d4d7
+    }
b7d4d7
+    if (frame)
b7d4d7
+        AFR_STACK_DESTROY(frame);
b7d4d7
+    return -child_op_errno;
b7d4d7
+}
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
b7d4d7
index 00b5b2d..20b07dd 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
b7d4d7
@@ -16,54 +16,170 @@
b7d4d7
 #include <glusterfs/syncop-utils.h>
b7d4d7
 #include <glusterfs/events.h>
b7d4d7
 
b7d4d7
-static int
b7d4d7
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
b7d4d7
-                          inode_t *inode, int child, struct afr_reply *replies)
b7d4d7
+int
b7d4d7
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
b7d4d7
+                              inode_t *inode, int child,
b7d4d7
+                              struct afr_reply *replies,
b7d4d7
+                              gf_boolean_t *anon_inode)
b7d4d7
 {
b7d4d7
     afr_private_t *priv = NULL;
b7d4d7
+    afr_local_t *local = NULL;
b7d4d7
     xlator_t *subvol = NULL;
b7d4d7
     int ret = 0;
b7d4d7
+    int i = 0;
b7d4d7
+    char g[64] = {0};
b7d4d7
+    unsigned char *lookup_success = NULL;
b7d4d7
+    call_frame_t *frame = NULL;
b7d4d7
+    loc_t loc2 = {
b7d4d7
+        0,
b7d4d7
+    };
b7d4d7
     loc_t loc = {
b7d4d7
         0,
b7d4d7
     };
b7d4d7
-    char g[64];
b7d4d7
 
b7d4d7
     priv = this->private;
b7d4d7
-
b7d4d7
     subvol = priv->children[child];
b7d4d7
+    lookup_success = alloca0(priv->child_count);
b7d4d7
+    uuid_utoa_r(replies[child].poststat.ia_gfid, g);
b7d4d7
+    loc.inode = inode_new(inode->table);
b7d4d7
+    if (!loc.inode) {
b7d4d7
+        ret = -ENOMEM;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (replies[child].poststat.ia_type == IA_IFDIR) {
b7d4d7
+        /* This directory may have sub-directory hierarchy which may need to
b7d4d7
+         * be preserved for subsequent heals. So unconditionally move the
b7d4d7
+         * directory to anonymous-inode directory*/
b7d4d7
+        *anon_inode = _gf_true;
b7d4d7
+        goto anon_inode;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    frame = afr_frame_create(this, &ret;;
b7d4d7
+    if (!frame) {
b7d4d7
+        ret = -ret;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    local = frame->local;
b7d4d7
+    gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
b7d4d7
+    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
b7d4d7
+               NULL);
b7d4d7
+    for (i = 0; i < priv->child_count; i++) {
b7d4d7
+        if (local->replies[i].op_ret == 0) {
b7d4d7
+            lookup_success[i] = 1;
b7d4d7
+        } else if (local->replies[i].op_errno != ENOENT &&
b7d4d7
+                   local->replies[i].op_errno != ESTALE) {
b7d4d7
+            ret = -local->replies[i].op_errno;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (priv->quorum_count) {
b7d4d7
+        if (afr_has_quorum(lookup_success, this, NULL)) {
b7d4d7
+            *anon_inode = _gf_true;
b7d4d7
+        }
b7d4d7
+    } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
b7d4d7
+        *anon_inode = _gf_true;
b7d4d7
+    } else if (ret) {
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+anon_inode:
b7d4d7
+    if (!*anon_inode) {
b7d4d7
+        ret = 0;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
 
b7d4d7
     loc.parent = inode_ref(dir);
b7d4d7
     gf_uuid_copy(loc.pargfid, dir->gfid);
b7d4d7
     loc.name = name;
b7d4d7
-    loc.inode = inode_ref(inode);
b7d4d7
 
b7d4d7
-    if (replies[child].valid && replies[child].op_ret == 0) {
b7d4d7
-        switch (replies[child].poststat.ia_type) {
b7d4d7
-            case IA_IFDIR:
b7d4d7
-                gf_msg(this->name, GF_LOG_WARNING, 0,
b7d4d7
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
b7d4d7
-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
b7d4d7
-                       subvol->name);
b7d4d7
-                ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
b7d4d7
-                break;
b7d4d7
-            default:
b7d4d7
-                gf_msg(this->name, GF_LOG_WARNING, 0,
b7d4d7
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
-                       "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
b7d4d7
-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
b7d4d7
-                       subvol->name);
b7d4d7
-                ret = syncop_unlink(subvol, &loc, NULL, NULL);
b7d4d7
-                break;
b7d4d7
-        }
b7d4d7
+    ret = afr_anon_inode_create(this, child, &loc2.parent);
b7d4d7
+    if (ret < 0)
b7d4d7
+        goto out;
b7d4d7
+
b7d4d7
+    loc2.name = g;
b7d4d7
+    ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
b7d4d7
+    if (ret < 0) {
b7d4d7
+        gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
+               "Rename to %s dir %s/%s (%s) on %s failed",
b7d4d7
+               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
b7d4d7
+               subvol->name);
b7d4d7
+    } else {
b7d4d7
+        gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
+               "Rename to %s dir %s/%s (%s) on %s successful",
b7d4d7
+               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
b7d4d7
+               subvol->name);
b7d4d7
     }
b7d4d7
 
b7d4d7
+out:
b7d4d7
     loc_wipe(&loc;;
b7d4d7
+    loc_wipe(&loc2);
b7d4d7
+    if (frame) {
b7d4d7
+        AFR_STACK_DESTROY(frame);
b7d4d7
+    }
b7d4d7
 
b7d4d7
     return ret;
b7d4d7
 }
b7d4d7
 
b7d4d7
 int
b7d4d7
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
b7d4d7
+                          inode_t *inode, int child, struct afr_reply *replies)
b7d4d7
+{
b7d4d7
+    char g[64] = {0};
b7d4d7
+    afr_private_t *priv = NULL;
b7d4d7
+    xlator_t *subvol = NULL;
b7d4d7
+    int ret = 0;
b7d4d7
+    loc_t loc = {
b7d4d7
+        0,
b7d4d7
+    };
b7d4d7
+    gf_boolean_t anon_inode = _gf_false;
b7d4d7
+
b7d4d7
+    priv = this->private;
b7d4d7
+    subvol = priv->children[child];
b7d4d7
+
b7d4d7
+    if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
b7d4d7
+        /*Nothing to do*/
b7d4d7
+        ret = 0;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    if (priv->use_anon_inode) {
b7d4d7
+        ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
b7d4d7
+                                            replies, &anon_inode);
b7d4d7
+        if (ret < 0 || anon_inode)
b7d4d7
+            goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    loc.parent = inode_ref(dir);
b7d4d7
+    loc.inode = inode_new(inode->table);
b7d4d7
+    if (!loc.inode) {
b7d4d7
+        ret = -ENOMEM;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    loc.name = name;
b7d4d7
+    switch (replies[child].poststat.ia_type) {
b7d4d7
+        case IA_IFDIR:
b7d4d7
+            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
+                   "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
b7d4d7
+                   uuid_utoa_r(replies[child].poststat.ia_gfid, g),
b7d4d7
+                   subvol->name);
b7d4d7
+            ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
b7d4d7
+            break;
b7d4d7
+        default:
b7d4d7
+            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
+                   "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
b7d4d7
+                   name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
b7d4d7
+                   subvol->name);
b7d4d7
+            ret = syncop_unlink(subvol, &loc, NULL, NULL);
b7d4d7
+            break;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    loc_wipe(&loc;;
b7d4d7
+    return ret;
b7d4d7
+}
b7d4d7
+
b7d4d7
+int
b7d4d7
 afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
b7d4d7
                             unsigned char *sources, inode_t *dir,
b7d4d7
                             const char *name, inode_t *inode,
b7d4d7
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
b7d4d7
     loc_t srcloc = {
b7d4d7
         0,
b7d4d7
     };
b7d4d7
+    loc_t anonloc = {
b7d4d7
+        0,
b7d4d7
+    };
b7d4d7
     xlator_t *this = frame->this;
b7d4d7
     afr_private_t *priv = NULL;
b7d4d7
     dict_t *xdata = NULL;
b7d4d7
@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
b7d4d7
         0,
b7d4d7
     };
b7d4d7
     unsigned char *newentry = NULL;
b7d4d7
+    char iatt_uuid_str[64] = {0};
b7d4d7
+    char dir_uuid_str[64] = {0};
b7d4d7
 
b7d4d7
     priv = this->private;
b7d4d7
     iatt = &replies[source].poststat;
b7d4d7
+    uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
b7d4d7
     if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
b7d4d7
         gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
b7d4d7
                "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
b7d4d7
                "pargfid=%s, name=%s",
b7d4d7
-               iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
b7d4d7
-               uuid_utoa(dir->gfid), name);
b7d4d7
+               iatt->ia_type, iatt_uuid_str, source,
b7d4d7
+               uuid_utoa_r(dir->gfid, dir_uuid_str), name);
b7d4d7
         ret = -EINVAL;
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
b7d4d7
 
b7d4d7
     srcloc.inode = inode_ref(inode);
b7d4d7
     gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
b7d4d7
-    if (iatt->ia_type != IA_IFDIR)
b7d4d7
-        ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
b7d4d7
-    if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
b7d4d7
+    ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
b7d4d7
+    if (ret == -ENOENT || ret == -ESTALE) {
b7d4d7
         newentry[dst] = 1;
b7d4d7
         ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
b7d4d7
                                          sources, newentry);
b7d4d7
         if (ret)
b7d4d7
             goto out;
b7d4d7
+    } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
b7d4d7
+        // Try rename from hidden directory
b7d4d7
+        ret = afr_anon_inode_create(this, dst, &anonloc.parent);
b7d4d7
+        if (ret < 0)
b7d4d7
+            goto out;
b7d4d7
+        anonloc.inode = inode_ref(inode);
b7d4d7
+        anonloc.name = iatt_uuid_str;
b7d4d7
+        ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
b7d4d7
+        if (ret == -ENOENT || ret == -ESTALE)
b7d4d7
+            ret = -1; /*This sets 'mismatch' to true*/
b7d4d7
+        goto out;
b7d4d7
     }
b7d4d7
 
b7d4d7
     mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
b7d4d7
@@ -165,6 +297,7 @@ out:
b7d4d7
     GF_FREE(linkname);
b7d4d7
     loc_wipe(&loc;;
b7d4d7
     loc_wipe(&srcloc);
b7d4d7
+    loc_wipe(&anonloc);
b7d4d7
     return ret;
b7d4d7
 }
b7d4d7
 
b7d4d7
@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
b7d4d7
 
b7d4d7
     priv = this->private;
b7d4d7
 
b7d4d7
+    if (afr_is_private_directory(priv, fd->inode->gfid, name,
b7d4d7
+                                 GF_CLIENT_PID_SELF_HEALD)) {
b7d4d7
+        return 0;
b7d4d7
+    }
b7d4d7
+
b7d4d7
     xattr = dict_new();
b7d4d7
     if (!xattr)
b7d4d7
         return -ENOMEM;
b7d4d7
@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
b7d4d7
                                           replies);
b7d4d7
 
b7d4d7
         if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
b7d4d7
-            ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
b7d4d7
+            ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
b7d4d7
                                       inode->ia_type);
b7d4d7
             /* Why is ret force-set to 0? We do not care about
b7d4d7
              * index purge failing for full heal as it is quite
b7d4d7
@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
b7d4d7
             if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
b7d4d7
                 continue;
b7d4d7
 
b7d4d7
-            if (__is_root_gfid(fd->inode->gfid) &&
b7d4d7
-                !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
b7d4d7
-                continue;
b7d4d7
-
b7d4d7
             ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
b7d4d7
                                             loc.inode, subvol,
b7d4d7
                                             local->need_full_crawl);
b7d4d7
@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
b7d4d7
         /* The name indices under the pgfid index dir are guaranteed
b7d4d7
          * to be regular files. Hence the hardcoding.
b7d4d7
          */
b7d4d7
-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
b7d4d7
+        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
b7d4d7
         ret = 0;
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
b7d4d7
index dace071..51e3d8c 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
b7d4d7
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
b7d4d7
                             const char *bname, inode_t *inode,
b7d4d7
                             struct afr_reply *replies)
b7d4d7
 {
b7d4d7
-    loc_t loc = {
b7d4d7
-        0,
b7d4d7
-    };
b7d4d7
     int i = 0;
b7d4d7
     afr_private_t *priv = NULL;
b7d4d7
-    char g[64];
b7d4d7
     int ret = 0;
b7d4d7
 
b7d4d7
     priv = this->private;
b7d4d7
 
b7d4d7
-    loc.parent = inode_ref(parent);
b7d4d7
-    gf_uuid_copy(loc.pargfid, pargfid);
b7d4d7
-    loc.name = bname;
b7d4d7
-    loc.inode = inode_ref(inode);
b7d4d7
-
b7d4d7
     for (i = 0; i < priv->child_count; i++) {
b7d4d7
         if (!replies[i].valid)
b7d4d7
             continue;
b7d4d7
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
b7d4d7
         if (replies[i].op_ret)
b7d4d7
             continue;
b7d4d7
 
b7d4d7
-        switch (replies[i].poststat.ia_type) {
b7d4d7
-            case IA_IFDIR:
b7d4d7
-                gf_msg(this->name, GF_LOG_WARNING, 0,
b7d4d7
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
b7d4d7
-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
b7d4d7
-                       priv->children[i]->name);
b7d4d7
-
b7d4d7
-                ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
b7d4d7
-                break;
b7d4d7
-            default:
b7d4d7
-                gf_msg(this->name, GF_LOG_WARNING, 0,
b7d4d7
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
-                       "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
b7d4d7
-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
b7d4d7
-                       priv->children[i]->name);
b7d4d7
-
b7d4d7
-                ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
b7d4d7
-                break;
b7d4d7
-        }
b7d4d7
+        ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
b7d4d7
+                                         replies);
b7d4d7
     }
b7d4d7
 
b7d4d7
-    loc_wipe(&loc;;
b7d4d7
-
b7d4d7
     return ret;
b7d4d7
 }
b7d4d7
 
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
b7d4d7
index 8f6fb00..c8dc384 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heal.h
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heal.h
b7d4d7
@@ -370,4 +370,9 @@ gf_boolean_t
b7d4d7
 afr_is_file_empty_on_all_children(afr_private_t *priv,
b7d4d7
                                   struct afr_reply *replies);
b7d4d7
 
b7d4d7
+int
b7d4d7
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
b7d4d7
+                          inode_t *inode, int child, struct afr_reply *replies);
b7d4d7
+int
b7d4d7
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
b7d4d7
 #endif /* !_AFR_SELFHEAL_H */
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
b7d4d7
index 95ac5f2..939a135 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heald.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heald.c
b7d4d7
@@ -222,7 +222,7 @@ out:
b7d4d7
 }
b7d4d7
 
b7d4d7
 int
b7d4d7
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
b7d4d7
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
b7d4d7
                     ia_type_t type)
b7d4d7
 {
b7d4d7
     int ret = 0;
b7d4d7
@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
b7d4d7
     ret = afr_shd_selfheal(healer, healer->subvol, gfid);
b7d4d7
 
b7d4d7
     if (ret == -ENOENT || ret == -ESTALE)
b7d4d7
-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
b7d4d7
+        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
b7d4d7
 
b7d4d7
     if (ret == 2)
b7d4d7
         /* If bricks crashed in pre-op after creating indices/xattrop
b7d4d7
@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv)
b7d4d7
     return _gf_true;
b7d4d7
 }
b7d4d7
 
b7d4d7
+static int
b7d4d7
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
b7d4d7
+                           void *data)
b7d4d7
+{
b7d4d7
+    struct subvol_healer *healer = data;
b7d4d7
+    afr_private_t *priv = healer->this->private;
b7d4d7
+    call_frame_t *frame = NULL;
b7d4d7
+    afr_local_t *local = NULL;
b7d4d7
+    int ret = 0;
b7d4d7
+    loc_t loc = {0};
b7d4d7
+    int count = 0;
b7d4d7
+    int i = 0;
b7d4d7
+    int op_errno = 0;
b7d4d7
+    struct iatt *iatt = NULL;
b7d4d7
+    gf_boolean_t multiple_links = _gf_false;
b7d4d7
+    unsigned char *gfid_present = alloca0(priv->child_count);
b7d4d7
+    unsigned char *entry_present = alloca0(priv->child_count);
b7d4d7
+    char *type = "file";
b7d4d7
+
b7d4d7
+    frame = afr_frame_create(healer->this, &ret;;
b7d4d7
+    if (!frame) {
b7d4d7
+        ret = -ret;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    local = frame->local;
b7d4d7
+    if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
b7d4d7
+        gf_msg_debug(healer->this->name, 0,
b7d4d7
+                     "Not all bricks are up. Skipping "
b7d4d7
+                     "cleanup of %s on %s",
b7d4d7
+                     entry->d_name, subvol->name);
b7d4d7
+        ret = 0;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    loc.inode = inode_new(parent->inode->table);
b7d4d7
+    if (!loc.inode) {
b7d4d7
+        ret = -ENOMEM;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    ret = gf_uuid_parse(entry->d_name, loc.gfid);
b7d4d7
+    if (ret) {
b7d4d7
+        ret = 0;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
b7d4d7
+               NULL);
b7d4d7
+    for (i = 0; i < priv->child_count; i++) {
b7d4d7
+        if (local->replies[i].op_ret == 0) {
b7d4d7
+            count++;
b7d4d7
+            gfid_present[i] = 1;
b7d4d7
+            iatt = &local->replies[i].poststat;
b7d4d7
+            if (iatt->ia_type == IA_IFDIR) {
b7d4d7
+                type = "dir";
b7d4d7
+            }
b7d4d7
+
b7d4d7
+            if (i == healer->subvol) {
b7d4d7
+                if (local->replies[i].poststat.ia_nlink > 1) {
b7d4d7
+                    multiple_links = _gf_true;
b7d4d7
+                }
b7d4d7
+            }
b7d4d7
+        } else if (local->replies[i].op_errno != ENOENT &&
b7d4d7
+                   local->replies[i].op_errno != ESTALE) {
b7d4d7
+            /*We don't have complete view. Skip the entry*/
b7d4d7
+            gf_msg_debug(healer->this->name, local->replies[i].op_errno,
b7d4d7
+                         "Skipping cleanup of %s on %s", entry->d_name,
b7d4d7
+                         subvol->name);
b7d4d7
+            ret = 0;
b7d4d7
+            goto out;
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    /*Inode is deleted from subvol*/
b7d4d7
+    if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
b7d4d7
+        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
b7d4d7
+               AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
b7d4d7
+               priv->anon_inode_name, entry->d_name, subvol->name);
b7d4d7
+        ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
b7d4d7
+                                  iatt->ia_type);
b7d4d7
+        if (ret == -ENOENT || ret == -ESTALE)
b7d4d7
+            ret = 0;
b7d4d7
+    } else if (count > 1) {
b7d4d7
+        loc_wipe(&loc;;
b7d4d7
+        loc.parent = inode_ref(parent->inode);
b7d4d7
+        loc.name = entry->d_name;
b7d4d7
+        loc.inode = inode_new(parent->inode->table);
b7d4d7
+        if (!loc.inode) {
b7d4d7
+            ret = -ENOMEM;
b7d4d7
+            goto out;
b7d4d7
+        }
b7d4d7
+        AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
b7d4d7
+                   &loc, NULL);
b7d4d7
+        count = 0;
b7d4d7
+        for (i = 0; i < priv->child_count; i++) {
b7d4d7
+            if (local->replies[i].op_ret == 0) {
b7d4d7
+                count++;
b7d4d7
+                entry_present[i] = 1;
b7d4d7
+                iatt = &local->replies[i].poststat;
b7d4d7
+            } else if (local->replies[i].op_errno != ENOENT &&
b7d4d7
+                       local->replies[i].op_errno != ESTALE) {
b7d4d7
+                /*We don't have complete view. Skip the entry*/
b7d4d7
+                gf_msg_debug(healer->this->name, local->replies[i].op_errno,
b7d4d7
+                             "Skipping cleanup of %s on %s", entry->d_name,
b7d4d7
+                             subvol->name);
b7d4d7
+                ret = 0;
b7d4d7
+                goto out;
b7d4d7
+            }
b7d4d7
+        }
b7d4d7
+        for (i = 0; i < priv->child_count; i++) {
b7d4d7
+            if (gfid_present[i] && !entry_present[i]) {
b7d4d7
+                /*Entry is not anonymous on at least one subvol*/
b7d4d7
+                gf_msg_debug(healer->this->name, 0,
b7d4d7
+                             "Valid entry present on %s "
b7d4d7
+                             "Skipping cleanup of %s on %s",
b7d4d7
+                             priv->children[i]->name, entry->d_name,
b7d4d7
+                             subvol->name);
b7d4d7
+                ret = 0;
b7d4d7
+                goto out;
b7d4d7
+            }
b7d4d7
+        }
b7d4d7
+
b7d4d7
+        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
b7d4d7
+               AFR_MSG_EXPUNGING_FILE_OR_DIR,
b7d4d7
+               "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
b7d4d7
+               entry->d_name);
b7d4d7
+        ret = 0;
b7d4d7
+        for (i = 0; i < priv->child_count; i++) {
b7d4d7
+            op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
b7d4d7
+                                            entry->d_name, iatt->ia_type);
b7d4d7
+            if (op_errno != ENOENT && op_errno != ESTALE) {
b7d4d7
+                ret |= -op_errno;
b7d4d7
+            }
b7d4d7
+        }
b7d4d7
+    }
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    if (frame)
b7d4d7
+        AFR_STACK_DESTROY(frame);
b7d4d7
+    loc_wipe(&loc;;
b7d4d7
+    return ret;
b7d4d7
+}
b7d4d7
+
b7d4d7
+static void
b7d4d7
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
b7d4d7
+{
b7d4d7
+    int ret = 0;
b7d4d7
+    call_frame_t *frame = NULL;
b7d4d7
+    afr_private_t *priv = healer->this->private;
b7d4d7
+    loc_t loc = {0};
b7d4d7
+
b7d4d7
+    ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
b7d4d7
+    if (ret)
b7d4d7
+        goto out;
b7d4d7
+
b7d4d7
+    frame = afr_frame_create(healer->this, &ret;;
b7d4d7
+    if (!frame) {
b7d4d7
+        ret = -ret;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+    ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
b7d4d7
+                             GF_CLIENT_PID_SELF_HEALD, healer,
b7d4d7
+                             afr_shd_anon_inode_cleaner, NULL,
b7d4d7
+                             priv->shd.max_threads, priv->shd.wait_qlength);
b7d4d7
+out:
b7d4d7
+    if (frame)
b7d4d7
+        AFR_STACK_DESTROY(frame);
b7d4d7
+    loc_wipe(&loc;;
b7d4d7
+    return;
b7d4d7
+}
b7d4d7
+
b7d4d7
 void *
b7d4d7
 afr_shd_index_healer(void *data)
b7d4d7
 {
b7d4d7
@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data)
b7d4d7
             sleep(1);
b7d4d7
         } while (ret > 0);
b7d4d7
 
b7d4d7
+        if (ret == 0) {
b7d4d7
+            afr_cleanup_anon_inode_dir(healer);
b7d4d7
+        }
b7d4d7
+
b7d4d7
         if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
b7d4d7
             afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
b7d4d7
                                               pre_crawl_xdata);
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
b7d4d7
index 1990539..acd567e 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-self-heald.h
b7d4d7
+++ b/xlators/cluster/afr/src/afr-self-heald.h
b7d4d7
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
b7d4d7
                      char **path_p);
b7d4d7
 
b7d4d7
 int
b7d4d7
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
b7d4d7
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
b7d4d7
                     ia_type_t type);
b7d4d7
 #endif /* !_AFR_SELF_HEALD_H */
b7d4d7
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
b7d4d7
index bfa464f..33fe4d8 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr.c
b7d4d7
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
b7d4d7
     }
b7d4d7
 }
b7d4d7
 
b7d4d7
+void
b7d4d7
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
b7d4d7
+{
b7d4d7
+    char *volfile_id_str = NULL;
b7d4d7
+    uuid_t anon_inode_gfid = {0};
b7d4d7
+
b7d4d7
+    /*If volume id is not present don't enable anything*/
b7d4d7
+    if (dict_get_str(options, "volume-id", &volfile_id_str))
b7d4d7
+        return;
b7d4d7
+    GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
b7d4d7
+    /*anon_inode_name is not supposed to change once assigned*/
b7d4d7
+    if (!priv->anon_inode_name[0]) {
b7d4d7
+        snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
b7d4d7
+                 AFR_ANON_DIR_PREFIX, volfile_id_str);
b7d4d7
+        gf_uuid_parse(volfile_id_str, anon_inode_gfid);
b7d4d7
+        /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
b7d4d7
+        anon_inode_gfid[0] ^= 1;
b7d4d7
+        uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
b7d4d7
+    }
b7d4d7
+}
b7d4d7
+
b7d4d7
 int
b7d4d7
 reconfigure(xlator_t *this, dict_t *options)
b7d4d7
 {
b7d4d7
@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options)
b7d4d7
         consistent_io = _gf_false;
b7d4d7
     priv->consistent_io = consistent_io;
b7d4d7
 
b7d4d7
+    afr_handle_anon_inode_options(priv, options);
b7d4d7
+
b7d4d7
+    GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
b7d4d7
+                     out);
b7d4d7
     if (priv->shd.enabled) {
b7d4d7
         if ((priv->shd.enabled != enabled_old) ||
b7d4d7
             (timeout_old != priv->shd.timeout))
b7d4d7
@@ -535,7 +560,9 @@ init(xlator_t *this)
b7d4d7
 
b7d4d7
     GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
b7d4d7
     GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
b7d4d7
+    afr_handle_anon_inode_options(priv, this->options);
b7d4d7
 
b7d4d7
+    GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
b7d4d7
     if (priv->quorum_count != 0)
b7d4d7
         priv->consistent_io = _gf_false;
b7d4d7
 
b7d4d7
@@ -547,13 +574,16 @@ init(xlator_t *this)
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
 
b7d4d7
+    priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
b7d4d7
+                                 gf_afr_mt_char);
b7d4d7
+
b7d4d7
     priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
b7d4d7
                                gf_afr_mt_char);
b7d4d7
 
b7d4d7
     priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
b7d4d7
                                     gf_afr_mt_child_latency_t);
b7d4d7
 
b7d4d7
-    if (!priv->child_up || !priv->child_latency) {
b7d4d7
+    if (!priv->child_up || !priv->child_latency || !priv->anon_inode) {
b7d4d7
         ret = -ENOMEM;
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
@@ -1218,6 +1248,14 @@ struct volume_options options[] = {
b7d4d7
      .tags = {"replicate"},
b7d4d7
      .description = "This option exists only for backward compatibility "
b7d4d7
                     "and configuring it doesn't have any effect"},
b7d4d7
+    {.key = {"use-anonymous-inode"},
b7d4d7
+     .type = GF_OPTION_TYPE_BOOL,
b7d4d7
+     .default_value = "no",
b7d4d7
+     .op_version = {GD_OP_VERSION_7_0},
b7d4d7
+     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
b7d4d7
+     .tags = {"replicate"},
b7d4d7
+     .description = "Setting this option heals directory renames efficiently"},
b7d4d7
+
b7d4d7
     {.key = {NULL}},
b7d4d7
 };
b7d4d7
 
b7d4d7
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
b7d4d7
index 3a2b26d..6a9a763 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr.h
b7d4d7
+++ b/xlators/cluster/afr/src/afr.h
b7d4d7
@@ -40,6 +40,8 @@
b7d4d7
 #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
b7d4d7
 
b7d4d7
 #define AFR_HALO_MAX_LATENCY 99999
b7d4d7
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
b7d4d7
+
b7d4d7
 
b7d4d7
 #define PFLAG_PENDING (1 << 0)
b7d4d7
 #define PFLAG_SBRAIN (1 << 1)
b7d4d7
@@ -155,6 +157,7 @@ typedef struct _afr_private {
b7d4d7
     struct list_head ta_waitq;
b7d4d7
     struct list_head ta_onwireq;
b7d4d7
 
b7d4d7
+    unsigned char *anon_inode;
b7d4d7
     unsigned char *child_up;
b7d4d7
     int64_t *child_latency;
b7d4d7
     unsigned char *local;
b7d4d7
@@ -240,6 +243,11 @@ typedef struct _afr_private {
b7d4d7
     gf_boolean_t esh_granular;
b7d4d7
     gf_boolean_t consistent_io;
b7d4d7
     gf_boolean_t data_self_heal; /* on/off */
b7d4d7
+    gf_boolean_t use_anon_inode;
b7d4d7
+
b7d4d7
+    /*For anon-inode handling */
b7d4d7
+    char anon_inode_name[NAME_MAX + 1];
b7d4d7
+    char anon_gfid_str[UUID_SIZE + 1];
b7d4d7
 } afr_private_t;
b7d4d7
 
b7d4d7
 typedef enum {
b7d4d7
@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
b7d4d7
 void
b7d4d7
 afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
b7d4d7
                          unsigned char *replies);
b7d4d7
+gf_boolean_t
b7d4d7
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
b7d4d7
+                         pid_t pid);
b7d4d7
 #endif /* __AFR_H__ */
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
b7d4d7
index 094a71f..1920284 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
b7d4d7
@@ -3867,6 +3867,38 @@ out:
b7d4d7
 }
b7d4d7
 
b7d4d7
 static int
b7d4d7
+set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
b7d4d7
+                      int clusters)
b7d4d7
+{
b7d4d7
+    xlator_t *xlator = NULL;
b7d4d7
+    int i = 0;
b7d4d7
+    int ret = -1;
b7d4d7
+    glusterd_conf_t *conf = NULL;
b7d4d7
+    xlator_t *this = NULL;
b7d4d7
+
b7d4d7
+    this = THIS;
b7d4d7
+    GF_VALIDATE_OR_GOTO("glusterd", this, out);
b7d4d7
+    conf = this->private;
b7d4d7
+    GF_VALIDATE_OR_GOTO(this->name, conf, out);
b7d4d7
+
b7d4d7
+    if (conf->op_version < GD_OP_VERSION_7_1)
b7d4d7
+        return 0;
b7d4d7
+    xlator = first_of(graph);
b7d4d7
+
b7d4d7
+    for (i = 0; i < clusters; i++) {
b7d4d7
+        ret = xlator_set_fixed_option(xlator, "volume-id",
b7d4d7
+                                      uuid_utoa(volinfo->volume_id));
b7d4d7
+        if (ret)
b7d4d7
+            goto out;
b7d4d7
+
b7d4d7
+        xlator = xlator->next;
b7d4d7
+    }
b7d4d7
+
b7d4d7
+out:
b7d4d7
+    return ret;
b7d4d7
+}
b7d4d7
+
b7d4d7
+static int
b7d4d7
 volgen_graph_build_afr_clusters(volgen_graph_t *graph,
b7d4d7
                                 glusterd_volinfo_t *volinfo)
b7d4d7
 {
b7d4d7
@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
b7d4d7
         clusters = -1;
b7d4d7
         goto out;
b7d4d7
     }
b7d4d7
+
b7d4d7
+    ret = set_volfile_id_option(graph, volinfo, clusters);
b7d4d7
+    if (ret) {
b7d4d7
+        clusters = -1;
b7d4d7
+        goto out;
b7d4d7
+    }
b7d4d7
+
b7d4d7
     if (!volinfo->arbiter_count)
b7d4d7
         goto out;
b7d4d7
 
b7d4d7
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
b7d4d7
index 62acadf..c1ca190 100644
b7d4d7
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
b7d4d7
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
b7d4d7
@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
b7d4d7
      .voltype = "features/cloudsync",
b7d4d7
      .op_version = GD_OP_VERSION_7_0,
b7d4d7
      .flags = VOLOPT_FLAG_CLIENT_OPT},
b7d4d7
+
b7d4d7
+    {.key = "cluster.use-anonymous-inode",
b7d4d7
+     .voltype = "cluster/replicate",
b7d4d7
+     .op_version = GD_OP_VERSION_7_1,
b7d4d7
+     .value = "yes",
b7d4d7
+     .flags = VOLOPT_FLAG_CLIENT_OPT},
b7d4d7
     {.key = NULL}};
b7d4d7
-- 
b7d4d7
1.8.3.1
b7d4d7