74096c
From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001
74096c
From: karthik-us <ksubrahm@redhat.com>
74096c
Date: Wed, 9 Dec 2020 10:46:31 +0530
74096c
Subject: [PATCH 515/517] cluster/afr: Heal directory rename without
74096c
 rmdir/mkdir
74096c
74096c
Problem1:
74096c
When a directory is renamed while a brick
74096c
is down entry-heal always did an rm -rf on that directory on
74096c
the sink on old location and did mkdir and created the directory
74096c
hierarchy again in the new location. This is inefficient.
74096c
74096c
Problem2:
74096c
Renamedir heal order may lead to a scenario where directory in
74096c
the new location could be created before deleting it from old
74096c
location leading to 2 directories with same gfid in posix.
74096c
74096c
Fix:
74096c
As part of heal, if oldlocation is healed first and is not present in
74096c
source-brick always rename it into a hidden directory inside the
74096c
sink-brick so that when heal is triggered in new-location shd can
74096c
rename it from this hidden directory to the new-location.
74096c
74096c
If new-location heal is triggered first and it detects that the
74096c
directory already exists in the brick, then it should skip healing the
74096c
directory until it appears in the hidden directory.
74096c
74096c
Credits: Ravi for rename-data-loss.t script
74096c
74096c
Upstream patch details:
74096c
> Fixes: #1211
74096c
> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
74096c
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
74096c
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/
74096c
74096c
BUG: 1640148
74096c
Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
74096c
Signed-off-by: karthik-us <ksubrahm@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/220660
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
74096c
---
74096c
 tests/afr.rc                                    |  16 +
74096c
 tests/basic/afr/afr-anon-inode-no-quorum.t      |  63 ++++
74096c
 tests/basic/afr/afr-anon-inode.t                | 114 ++++++
74096c
 tests/basic/afr/entry-self-heal-anon-dir-off.t  | 464 ++++++++++++++++++++++++
74096c
 tests/basic/afr/rename-data-loss.t              |  72 ++++
74096c
 tests/bugs/replicate/bug-1744548-heal-timeout.t |   6 +-
74096c
 tests/features/trash.t                          |  74 ++--
74096c
 xlators/cluster/afr/src/afr-common.c            |  46 ++-
74096c
 xlators/cluster/afr/src/afr-dir-read.c          |  12 +-
74096c
 xlators/cluster/afr/src/afr-self-heal-common.c  | 182 ++++++++++
74096c
 xlators/cluster/afr/src/afr-self-heal-entry.c   | 206 +++++++++--
74096c
 xlators/cluster/afr/src/afr-self-heal-name.c    |  33 +-
74096c
 xlators/cluster/afr/src/afr-self-heal.h         |   5 +
74096c
 xlators/cluster/afr/src/afr-self-heald.c        | 178 ++++++++-
74096c
 xlators/cluster/afr/src/afr-self-heald.h        |   2 +-
74096c
 xlators/cluster/afr/src/afr.c                   |  40 +-
74096c
 xlators/cluster/afr/src/afr.h                   |  11 +
74096c
 xlators/mgmt/glusterd/src/glusterd-volgen.c     |  39 ++
74096c
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |   6 +
74096c
 19 files changed, 1442 insertions(+), 127 deletions(-)
74096c
 create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t
74096c
 create mode 100644 tests/basic/afr/afr-anon-inode.t
74096c
 create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t
74096c
 create mode 100644 tests/basic/afr/rename-data-loss.t
74096c
74096c
diff --git a/tests/afr.rc b/tests/afr.rc
74096c
index 35f352d..2417899 100644
74096c
--- a/tests/afr.rc
74096c
+++ b/tests/afr.rc
74096c
@@ -105,3 +105,19 @@ function get_quorum_type()
74096c
         local repl_id="$3"
74096c
         cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
74096c
 }
74096c
+
74096c
+function afr_private_key_value()
74096c
+{
74096c
+        local v=$1
74096c
+        local m=$2
74096c
+        local replica_id=$3
74096c
+        local key=$4
74096c
+#xargs at the end will strip leading spaces
74096c
+        grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
74096c
+}
74096c
+
74096c
+function afr_anon_entry_count()
74096c
+{
74096c
+    local b=$1
74096c
+    ls $b/.glusterfs-anonymous-inode* | wc -l
74096c
+}
74096c
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
74096c
new file mode 100644
74096c
index 0000000..896ba0c
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
74096c
@@ -0,0 +1,63 @@
74096c
+#!/bin/bash
74096c
+
74096c
+#Test that anon-inode entry is not cleaned up as long as there exists at least
74096c
+#one valid entry
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
74096c
+TEST $CLI volume heal $V0 disable
74096c
+TEST $CLI volume set $V0 performance.write-behind off
74096c
+TEST $CLI volume set $V0 performance.read-ahead off
74096c
+TEST $CLI volume set $V0 performance.readdir-ahead off
74096c
+TEST $CLI volume set $V0 performance.open-behind off
74096c
+TEST $CLI volume set $V0 performance.stat-prefetch off
74096c
+TEST $CLI volume set $V0 performance.io-cache off
74096c
+TEST $CLI volume set $V0 performance.quick-read off
74096c
+TEST $CLI volume set $V0 cluster.entry-self-heal off
74096c
+TEST $CLI volume start $V0
74096c
+
74096c
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
74096c
+
74096c
+TEST touch $M0/a $M0/b
74096c
+
74096c
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
74096c
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}0
74096c
+TEST mv $M0/a $M0/a-new
74096c
+TEST mv $M0/b $M0/b-new
74096c
+
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+TEST ! ls $M0/a
74096c
+TEST ! ls $M0/b
74096c
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
74096c
+#Make sure index heal doesn't happen after enabling heal
74096c
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
74096c
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
74096c
+TEST $CLI volume heal $V0 enable
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+TEST $CLI volume heal $V0
74096c
+#Allow time for a scan
74096c
+sleep 5
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
74096c
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
74096c
+TEST rm -f $M0/a-new
74096c
+TEST stat $M0/b-new
74096c
+
74096c
+TEST $CLI volume heal $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
74096c
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
74096c
+
74096c
+cleanup
74096c
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
74096c
new file mode 100644
74096c
index 0000000..f4cf37a
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/afr-anon-inode.t
74096c
@@ -0,0 +1,114 @@
74096c
+#!/bin/bash
74096c
+#Tests that afr-anon-inode test cases work fine as expected
74096c
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
74096c
+#so these inodes are kept in a special directory
74096c
+
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
74096c
+TEST $CLI volume set $V0 performance.quick-read off
74096c
+TEST $CLI volume set $V0 performance.io-cache off
74096c
+TEST $CLI volume set $V0 performance.write-behind off
74096c
+TEST $CLI volume set $V0 performance.stat-prefetch off
74096c
+TEST $CLI volume set $V0 performance.read-ahead off
74096c
+TEST $CLI volume set $V0 performance.open-behind off
74096c
+TEST $CLI volume start $V0
74096c
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
74096c
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
74096c
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
74096c
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
74096c
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
74096c
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
74096c
+TEST mkdir -p $M0/d1/b $M0/d2/a
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}0
74096c
+TEST mv $M0/d2/a $M0/d1
74096c
+TEST mv $M0/d1/b $M0/d2
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
74096c
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
74096c
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
74096c
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
74096c
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
74096c
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
74096c
+
74096c
+TEST ! ls $M0/$anon_inode_name
74096c
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
74096c
+
74096c
+#Test purging code path by shd
74096c
+TEST $CLI volume heal $V0 disable
74096c
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
74096c
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
74096c
+TEST ln $M0/del-file $M0/del-file-link
74096c
+TEST ln $M0/l0/file $M0/l1/file-link1
74096c
+TEST ln $M0/l0/file $M0/l2/file-link2
74096c
+TEST mkdir -p $M0/del-recursive-dir/d1
74096c
+
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}0
74096c
+TEST rm -f $M0/del-file $M0/del-file-nolink
74096c
+TEST rm -rf $M0/del-recursive-dir
74096c
+TEST mv $M0/d1/a $M0/d2
74096c
+TEST mv $M0/l0/file $M0/l0/renamed-file
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
74096c
+
74096c
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
74096c
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
74096c
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
74096c
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
74096c
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
74096c
+TEST ! stat $M0/del-file
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
74096c
+TEST ! stat $M0/del-file-nolink
74096c
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
74096c
+TEST ! stat $M0/del-recursive-dir
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
74096c
+TEST ! stat $M0/d1/a
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
74096c
+TEST ! stat $M0/l0/file
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
74096c
+
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}1
74096c
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
74096c
+TEST ! stat $M0/l1/file-link1
74096c
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
74096c
+
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}2
74096c
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
74096c
+TEST ! stat $M0/l2/file-link2
74096c
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
74096c
+
74096c
+#Simulate only anon-inodes present in all bricks
74096c
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
74096c
+
74096c
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}1
74096c
+TEST $CLI volume heal $V0 enable
74096c
+$CLI volume heal $V0
74096c
+sleep 5 #Allow time for completion of one scan
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
74096c
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
74096c
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
74096c
+
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
74096c
+
74096c
+#Test that rename indeed happened instead of rmdir/mkdir
74096c
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
74096c
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
74096c
+cleanup;
74096c
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
74096c
new file mode 100644
74096c
index 0000000..0803a08
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
74096c
@@ -0,0 +1,464 @@
74096c
+#!/bin/bash
74096c
+
74096c
+#This file checks if missing entry self-heal and entry self-heal are working
74096c
+#as expected.
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+function get_file_type {
74096c
+        stat -c "%a:%F:%g:%t:%T:%u" $1
74096c
+}
74096c
+
74096c
+function diff_dirs {
74096c
+        diff <(ls $1 | sort) <(ls $2 | sort)
74096c
+}
74096c
+
74096c
+function heal_status {
74096c
+        local f1_path="${1}/${3}"
74096c
+        local f2_path="${2}/${3}"
74096c
+        local insync=""
74096c
+        diff_dirs $f1_path $f2_path
74096c
+        if [ $? -eq 0 ];
74096c
+        then
74096c
+                insync="Y"
74096c
+        else
74096c
+                insync="N"
74096c
+        fi
74096c
+        local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
74096c
+        local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
74096c
+        local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
74096c
+        local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
74096c
+        local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
74096c
+        local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
74096c
+        if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
74096c
+        if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
74096c
+        if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
74096c
+        if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
74096c
+        if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
74096c
+        if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
74096c
+        echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
74096c
+}
74096c
+
74096c
+function is_heal_done {
74096c
+        local zero_xattr="000000000000000000000000"
74096c
+        if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
74096c
+        then
74096c
+                echo "Y"
74096c
+        else
74096c
+                echo "N"
74096c
+        fi
74096c
+}
74096c
+
74096c
+function print_pending_heals {
74096c
+        local result=":"
74096c
+        for i in "$@";
74096c
+        do
74096c
+                if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
74096c
+                then
74096c
+                        result="$result:$i"
74096c
+                fi
74096c
+        done
74096c
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
74096c
+        if [ $result == ":" ]; then result="~"; fi
74096c
+        echo $result
74096c
+}
74096c
+
74096c
+zero_xattr="000000000000000000000000"
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
74096c
+TEST $CLI volume heal $V0 disable
74096c
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
74096c
+TEST $CLI volume set $V0 performance.write-behind off
74096c
+TEST $CLI volume set $V0 performance.read-ahead off
74096c
+TEST $CLI volume set $V0 performance.readdir-ahead off
74096c
+TEST $CLI volume set $V0 performance.open-behind off
74096c
+TEST $CLI volume set $V0 performance.stat-prefetch off
74096c
+TEST $CLI volume set $V0 performance.io-cache off
74096c
+TEST $CLI volume set $V0 performance.quick-read off
74096c
+TEST $CLI volume set $V0 cluster.data-self-heal on
74096c
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
74096c
+TEST $CLI volume set $V0 cluster.entry-self-heal on
74096c
+TEST $CLI volume start $V0
74096c
+
74096c
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
74096c
+cd $M0
74096c
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
74096c
+#spb is split-brain, fool is all fool
74096c
+
74096c
+#source_self_accusing means there exists source and a sink which self-accuses.
74096c
+#This simulates failures where fops failed on the bricks without it going down.
74096c
+#Something like EACCESS/EDQUOT etc
74096c
+
74096c
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
74096c
+TEST mkfifo source_deletions_heal/fifo
74096c
+TEST mknod  source_deletions_heal/block b 4 5
74096c
+TEST mknod  source_deletions_heal/char c 1 5
74096c
+TEST touch  source_deletions_heal/file
74096c
+TEST ln -s  source_deletions_heal/file source_deletions_heal/slink
74096c
+TEST mkdir  source_deletions_heal/dir1
74096c
+TEST mkdir  source_deletions_heal/dir1/dir2
74096c
+
74096c
+TEST mkfifo source_deletions_me/fifo
74096c
+TEST mknod  source_deletions_me/block b 4 5
74096c
+TEST mknod  source_deletions_me/char c 1 5
74096c
+TEST touch  source_deletions_me/file
74096c
+TEST ln -s  source_deletions_me/file source_deletions_me/slink
74096c
+TEST mkdir  source_deletions_me/dir1
74096c
+TEST mkdir  source_deletions_me/dir1/dir2
74096c
+
74096c
+TEST mkfifo source_self_accusing/fifo
74096c
+TEST mknod  source_self_accusing/block b 4 5
74096c
+TEST mknod  source_self_accusing/char c 1 5
74096c
+TEST touch  source_self_accusing/file
74096c
+TEST ln -s  source_self_accusing/file source_self_accusing/slink
74096c
+TEST mkdir  source_self_accusing/dir1
74096c
+TEST mkdir  source_self_accusing/dir1/dir2
74096c
+
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}0
74096c
+
74096c
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
74096c
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
74096c
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
74096c
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
74096c
+
74096c
+#Test that the files are deleted
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/block
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/char
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/block
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/char
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/file
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
74096c
+
74096c
+
74096c
+TEST mkfifo source_creations_heal/fifo
74096c
+TEST mknod  source_creations_heal/block b 4 5
74096c
+TEST mknod  source_creations_heal/char c 1 5
74096c
+TEST touch  source_creations_heal/file
74096c
+TEST ln -s  source_creations_heal/file source_creations_heal/slink
74096c
+TEST mkdir  source_creations_heal/dir1
74096c
+TEST mkdir  source_creations_heal/dir1/dir2
74096c
+
74096c
+TEST mkfifo source_creations_me/fifo
74096c
+TEST mknod  source_creations_me/block b 4 5
74096c
+TEST mknod  source_creations_me/char c 1 5
74096c
+TEST touch  source_creations_me/file
74096c
+TEST ln -s  source_creations_me/file source_creations_me/slink
74096c
+TEST mkdir  source_creations_me/dir1
74096c
+TEST mkdir  source_creations_me/dir1/dir2
74096c
+
74096c
+$CLI volume stop $V0
74096c
+
74096c
+#simulate fool fool scenario for fool_* dirs
74096c
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
74096c
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
74096c
+
74096c
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
74096c
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
74096c
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
74096c
+
74096c
+$CLI volume start $V0 force
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}1
74096c
+
74096c
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
74096c
+
74096c
+$CLI volume stop $V0
74096c
+
74096c
+#simulate fool fool scenario for fool_* dirs
74096c
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
74096c
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
74096c
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
74096c
+
74096c
+#simulate self-accusing for source_self_accusing
74096c
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
74096c
+
74096c
+$CLI volume start $V0 force
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+
74096c
+# Check if conservative merges happened correctly on _me_ dirs
74096c
+TEST stat spb_me_heal/1
74096c
+TEST stat $B0/${V0}0/spb_me_heal/1
74096c
+TEST stat $B0/${V0}1/spb_me_heal/1
74096c
+
74096c
+TEST stat spb_me_heal/0
74096c
+TEST stat $B0/${V0}0/spb_me_heal/0
74096c
+TEST stat $B0/${V0}1/spb_me_heal/0
74096c
+
74096c
+TEST stat fool_me/1
74096c
+TEST stat $B0/${V0}0/fool_me/1
74096c
+TEST stat $B0/${V0}1/fool_me/1
74096c
+
74096c
+TEST stat fool_me/0
74096c
+TEST stat $B0/${V0}0/fool_me/0
74096c
+TEST stat $B0/${V0}1/fool_me/0
74096c
+
74096c
+TEST stat v1_fool_me/0
74096c
+TEST stat $B0/${V0}0/v1_fool_me/0
74096c
+TEST stat $B0/${V0}1/v1_fool_me/0
74096c
+
74096c
+TEST stat v1_fool_me/1
74096c
+TEST stat $B0/${V0}0/v1_fool_me/1
74096c
+TEST stat $B0/${V0}1/v1_fool_me/1
74096c
+
74096c
+TEST stat v1_dirty_me/0
74096c
+TEST stat $B0/${V0}0/v1_dirty_me/0
74096c
+TEST stat $B0/${V0}1/v1_dirty_me/0
74096c
+
74096c
+#Check if files that have gfid-mismatches in _me_ are giving EIO
74096c
+TEST ! stat spb_me/0
74096c
+
74096c
+#Check if stale files are deleted on access
74096c
+TEST ! stat source_deletions_me/fifo
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
74096c
+TEST ! stat source_deletions_me/block
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/block
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/block
74096c
+TEST ! stat source_deletions_me/char
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/char
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/char
74096c
+TEST ! stat source_deletions_me/file
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/file
74096c
+TEST ! stat source_deletions_me/file
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/file
74096c
+TEST ! stat source_deletions_me/dir1/dir2
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
74096c
+TEST ! stat source_deletions_me/dir1
74096c
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
74096c
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
74096c
+
74096c
+#Test if the files created as part of access are healed correctly
74096c
+r=$(get_file_type source_creations_me/fifo)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
74096c
+TEST [ -p source_creations_me/fifo ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/block)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
74096c
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
74096c
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
74096c
+TEST [ -b source_creations_me/block ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/char)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
74096c
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
74096c
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
74096c
+TEST [ -c source_creations_me/char ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/file)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
74096c
+TEST [ -f source_creations_me/file ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/slink)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
74096c
+TEST [ -h source_creations_me/slink ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/dir1/dir2)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
74096c
+TEST [ -d source_creations_me/dir1/dir2 ]
74096c
+
74096c
+r=$(get_file_type source_creations_me/dir1)
74096c
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
74096c
+TEST [ -d source_creations_me/dir1 ]
74096c
+
74096c
+#Trigger heal and check _heal dirs are healed properly
74096c
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}1
74096c
+$CLI volume start $V0 force
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+
74096c
+TEST stat spb_heal
74096c
+TEST stat spb_me_heal
74096c
+TEST stat fool_heal
74096c
+TEST stat fool_me
74096c
+TEST stat v1_fool_heal
74096c
+TEST stat v1_fool_me
74096c
+TEST stat source_deletions_heal
74096c
+TEST stat source_deletions_me
74096c
+TEST stat source_self_accusing
74096c
+TEST stat source_creations_heal
74096c
+TEST stat source_creations_me
74096c
+TEST stat v1_dirty_heal
74096c
+TEST stat v1_dirty_me
74096c
+TEST $CLI volume stop $V0
74096c
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
74096c
+
74096c
+$CLI volume start $V0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+
74096c
+#Create base entry in indices/xattrop
74096c
+echo "Data" > $M0/FILE
74096c
+rm -f $M0/FILE
74096c
+EXPECT "1" count_index_entries $B0/${V0}0
74096c
+EXPECT "1" count_index_entries $B0/${V0}1
74096c
+
74096c
+TEST $CLI volume stop $V0;
74096c
+
74096c
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
74096c
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
74096c
+
74096c
+$CLI volume start $V0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+
74096c
+$CLI volume heal $V0 enable
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+
74096c
+TEST $CLI volume heal $V0;
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
74096c
+
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
74096c
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
74096c
+
74096c
+#Don't access the files/dirs from mount point as that may cause self-heals
74096c
+# Check if conservative merges happened correctly on heal dirs
74096c
+TEST stat $B0/${V0}0/spb_heal/1
74096c
+TEST stat $B0/${V0}1/spb_heal/1
74096c
+
74096c
+TEST stat $B0/${V0}0/spb_heal/0
74096c
+TEST stat $B0/${V0}1/spb_heal/0
74096c
+
74096c
+TEST stat $B0/${V0}0/fool_heal/1
74096c
+TEST stat $B0/${V0}1/fool_heal/1
74096c
+
74096c
+TEST stat $B0/${V0}0/fool_heal/0
74096c
+TEST stat $B0/${V0}1/fool_heal/0
74096c
+
74096c
+TEST stat $B0/${V0}0/v1_fool_heal/0
74096c
+TEST stat $B0/${V0}1/v1_fool_heal/0
74096c
+
74096c
+TEST stat $B0/${V0}0/v1_fool_heal/1
74096c
+TEST stat $B0/${V0}1/v1_fool_heal/1
74096c
+
74096c
+TEST stat $B0/${V0}0/v1_dirty_heal/0
74096c
+TEST stat $B0/${V0}1/v1_dirty_heal/0
74096c
+
74096c
+#Check if files that have gfid-mismatches in spb are giving EIO
74096c
+TEST ! stat spb/0
74096c
+
74096c
+#Check if stale files are deleted on access
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
74096c
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
74096c
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
74096c
+
74096c
+#Check if stale files are deleted on access
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/block
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/block
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/char
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/char
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/file
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/file
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/file
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/file
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
74096c
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
74096c
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
74096c
+
74096c
+#Test if the files created as part of full self-heal correctly
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
74096c
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
74096c
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
74096c
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
74096c
+
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
74096c
+
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
74096c
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
74096c
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
74096c
+
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
74096c
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
74096c
+
74096c
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
74096c
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
74096c
+
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
74096c
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
74096c
+
74096c
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
74096c
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
74096c
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
74096c
+
74096c
+cd -
74096c
+
74096c
+#Anonymous directory shouldn't be created
74096c
+TEST mkdir $M0/rename-dir
74096c
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
74096c
+TEST kill_brick $V0 $H0 $B0/${V0}1
74096c
+TEST mv $M0/rename-dir $M0/new-name
74096c
+TEST $CLI volume start $V0 force
74096c
+#Since features.ctime is not enabled by default in downstream, the below test
74096c
+#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata
74096c
+#xattr set which will differ for the parent in the gfid split-brain scenario
74096c
+#and when lookup is triggered, the gfid gets added to indices/xattrop leading
74096c
+#the below test to pass in upstream. Hence commenting it here.
74096c
+#'spb' is in split-brain so pending-heal-count will be 2
74096c
+#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
74096c
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
74096c
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
74096c
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
74096c
+EXPECT_NOT "$before_rename" echo $after_rename
74096c
+cleanup
74096c
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
74096c
new file mode 100644
74096c
index 0000000..256ee2a
74096c
--- /dev/null
74096c
+++ b/tests/basic/afr/rename-data-loss.t
74096c
@@ -0,0 +1,72 @@
74096c
+#!/bin/bash
74096c
+#Self-heal tests
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+. $(dirname $0)/../../afr.rc
74096c
+
74096c
+cleanup;
74096c
+
74096c
+TEST glusterd
74096c
+TEST pidof glusterd
74096c
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
74096c
+TEST $CLI volume set $V0 write-behind off
74096c
+TEST $CLI volume set $V0 self-heal-daemon off
74096c
+TEST $CLI volume set $V0 data-self-heal off
74096c
+TEST $CLI volume set $V0 metadata-self-heal off
74096c
+TEST $CLI volume set $V0 entry-self-heal off
74096c
+TEST $CLI volume start $V0
74096c
+EXPECT 'Started' volinfo_field $V0 'Status'
74096c
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
74096c
+
74096c
+cd $M0
74096c
+TEST `echo "line1" >> file1`
74096c
+TEST mkdir dir1
74096c
+TEST mkdir dir2
74096c
+TEST mkdir -p dir1/dira/dirb
74096c
+TEST `echo "line1">>dir1/dira/dirb/file1`
74096c
+TEST mkdir delete_me
74096c
+TEST `echo "line1" >> delete_me/file1`
74096c
+
74096c
+#brick0 has witnessed the second write while brick1 is down.
74096c
+TEST kill_brick $V0 $H0 $B0/brick1
74096c
+TEST `echo "line2" >> file1`
74096c
+TEST `echo "line2" >> dir1/dira/dirb/file1`
74096c
+TEST `echo "line2" >> delete_me/file1`
74096c
+
74096c
+#Toggle the bricks that are up/down.
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
74096c
+TEST kill_brick $V0 $H0 $B0/brick0
74096c
+
74096c
+#Rename when the 'source' brick0 for data-selfheals is down.
74096c
+mv file1 file2
74096c
+mv dir1/dira dir2
74096c
+
74096c
+#Delete a dir when brick0 is down.
74096c
+rm -rf delete_me
74096c
+cd -
74096c
+
74096c
+#Bring everything up and trigger heal
74096c
+TEST $CLI volume set $V0 self-heal-daemon on
74096c
+TEST $CLI volume start $V0 force
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
74096c
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
74096c
+TEST $CLI volume heal $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
74096c
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
74096c
+
74096c
+#Remount to avoid reading from caches
74096c
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
74096c
+EXPECT "line2" tail -1 $M0/file2
74096c
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
74096c
+TEST ! stat $M0/delete_me/file1
74096c
+TEST ! stat $M0/delete_me
74096c
+
74096c
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
74096c
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
74096c
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
74096c
+cleanup
74096c
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
74096c
index c208112..0115350 100644
74096c
--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
74096c
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
74096c
@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
74096c
 TEST $CLI volume profile $V0 start
74096c
 TEST $CLI volume profile $V0 info clear
74096c
 TEST $CLI volume heal $V0 enable
74096c
-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
74096c
-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
74096c
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
74096c
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
74096c
 
74096c
 # Check that a change in heal-timeout is honoured immediately.
74096c
 TEST $CLI volume set $V0 cluster.heal-timeout 5
74096c
 sleep 10
74096c
 # Two crawls must have happened.
74096c
-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
74096c
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
74096c
 
74096c
 # shd must not heal if it is disabled and heal-timeout is changed.
74096c
 TEST $CLI volume heal $V0 disable
74096c
diff --git a/tests/features/trash.t b/tests/features/trash.t
74096c
index 472e909..da5b50b 100755
74096c
--- a/tests/features/trash.t
74096c
+++ b/tests/features/trash.t
74096c
@@ -94,105 +94,105 @@ wildcard_not_exists() {
74096c
         if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
74096c
 }
74096c
 
74096c
-# testing glusterd [1-3]
74096c
+# testing glusterd
74096c
 TEST glusterd
74096c
 TEST pidof glusterd
74096c
 TEST $CLI volume info
74096c
 
74096c
-# creating distributed volume [4]
74096c
+# creating distributed volume
74096c
 TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
74096c
 
74096c
-# checking volume status [5-7]
74096c
+# checking volume status
74096c
 EXPECT "$V0" volinfo_field $V0 'Volume Name'
74096c
 EXPECT 'Created' volinfo_field $V0 'Status'
74096c
 EXPECT '2' brick_count $V0
74096c
 
74096c
-# test without enabling trash translator [8]
74096c
+# test without enabling trash translator
74096c
 TEST start_vol $V0 $M0
74096c
 
74096c
-# test on enabling trash translator [9-10]
74096c
+# test on enabling trash translator
74096c
 TEST $CLI volume set $V0 features.trash on
74096c
 EXPECT 'on' volinfo_field $V0 'features.trash'
74096c
 
74096c
-# files directly under mount point [11]
74096c
+# files directly under mount point
74096c
 create_files $M0/file1 $M0/file2
74096c
 TEST file_exists $V0 file1 file2
74096c
 
74096c
-# perform unlink [12]
74096c
+# perform unlink
74096c
 TEST unlink_op file1
74096c
 
74096c
-# perform truncate [13]
74096c
+# perform truncate
74096c
 TEST truncate_op file2 4
74096c
 
74096c
-# create files directory hierarchy and check [14]
74096c
+# create files directory hierarchy and check
74096c
 mkdir -p $M0/1/2/3
74096c
 create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
74096c
 TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
74096c
 
74096c
-# perform unlink [15]
74096c
+# perform unlink
74096c
 TEST unlink_op 1/2/3/foo1
74096c
 
74096c
-# perform truncate [16]
74096c
+# perform truncate
74096c
 TEST truncate_op 1/2/3/foo2 4
74096c
 
74096c
 # create a directory for eliminate pattern
74096c
 mkdir $M0/a
74096c
 
74096c
-# set the eliminate pattern [17-18]
74096c
+# set the eliminate pattern
74096c
 TEST $CLI volume set $V0 features.trash-eliminate-path /a
74096c
 EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
74096c
 
74096c
-# create two files and check [19]
74096c
+# create two files and check
74096c
 create_files $M0/a/test1 $M0/a/test2
74096c
 TEST file_exists $V0 a/test1 a/test2
74096c
 
74096c
-# remove from eliminate pattern [20]
74096c
+# remove from eliminate pattern
74096c
 rm -f $M0/a/test1
74096c
 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
74096c
 
74096c
-# truncate from eliminate path [21-23]
74096c
+# truncate from eliminate path
74096c
 truncate -s 2 $M0/a/test2
74096c
 TEST [ -e $M0/a/test2 ]
74096c
 TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
74096c
 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
74096c
 
74096c
-# set internal op on [24-25]
74096c
+# set internal op on
74096c
 TEST $CLI volume set $V0 features.trash-internal-op on
74096c
 EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
74096c
 
74096c
-# again create two files and check [26]
74096c
+# again create two files and check
74096c
 create_files $M0/inop1 $M0/inop2
74096c
 TEST file_exists $V0 inop1 inop2
74096c
 
74096c
-# perform unlink [27]
74096c
+# perform unlink
74096c
 TEST unlink_op inop1
74096c
 
74096c
-# perform truncate [28]
74096c
+# perform truncate
74096c
 TEST truncate_op inop2 4
74096c
 
74096c
-# remove one brick and restart the volume [28-31]
74096c
+# remove one brick and restart the volume
74096c
 TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
74096c
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
 TEST $CLI volume stop $V0
74096c
 TEST start_vol $V0 $M0 $M0/.trashcan
74096c
 
74096c
-# again create two files and check [33]
74096c
+# again create two files and check
74096c
 create_files $M0/rebal1 $M0/rebal2
74096c
 TEST file_exists $V0 rebal1 rebal2
74096c
 
74096c
-# add one brick [34-35]
74096c
+# add one brick
74096c
 TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
74096c
 TEST [ -d $B0/${V0}3 ]
74096c
 
74096c
 
74096c
-# perform rebalance [36]
74096c
+# perform rebalance
74096c
 TEST $CLI volume rebalance $V0 start force
74096c
 EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
74096c
 
74096c
 #Find out which file was migrated to the new brick
74096c
 file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
74096c
 
74096c
-# check whether rebalance was succesful [37-40]
74096c
+# check whether rebalance was succesful
74096c
 EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
74096c
 EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
74096c
 
74096c
@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74096c
 # force required in case rebalance is not over
74096c
 TEST $CLI volume stop $V0 force
74096c
 
74096c
-# create a replicated volume [41]
74096c
+# create a replicated volume
74096c
 TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
74096c
 
74096c
-# checking volume status [42-45]
74096c
+# checking volume status
74096c
 EXPECT "$V1" volinfo_field $V1 'Volume Name'
74096c
 EXPECT 'Replicate' volinfo_field $V1 'Type'
74096c
 EXPECT 'Created' volinfo_field $V1 'Status'
74096c
 EXPECT '2' brick_count $V1
74096c
 
74096c
-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
74096c
+# enable trash with options and start the replicate volume by disabling automatic self-heal
74096c
 TEST $CLI volume set $V1 features.trash on
74096c
 TEST $CLI volume set $V1 features.trash-internal-op on
74096c
 EXPECT 'on' volinfo_field $V1 'features.trash'
74096c
 EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
74096c
 TEST start_vol $V1 $M1 $M1/.trashcan
74096c
 
74096c
-# mount and check for trash directory [51]
74096c
+# mount and check for trash directory
74096c
 TEST [ -d $M1/.trashcan/internal_op ]
74096c
 
74096c
-# create a file and check [52]
74096c
+# create a file and check
74096c
 touch $M1/self
74096c
 TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
74096c
 
74096c
-# kill one brick and delete the file from mount point [53-54]
74096c
+# kill one brick and delete the file from mount point
74096c
 kill_brick $V1 $H0 $B0/${V1}1
74096c
 EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
74096c
 rm -f $M1/self
74096c
 EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
74096c
 
74096c
-# force start the volume and trigger the self-heal manually [55-57]
74096c
-TEST $CLI volume start $V1 force
74096c
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
74096c
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
74096c
-# Since we created the file under root of the volume, it will be
74096c
-# healed automatically
74096c
-
74096c
-# check for the removed file in trashcan [58]
74096c
-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
74096c
-
74096c
-# check renaming of trash directory through cli [59-62]
74096c
+# check renaming of trash directory through cli
74096c
 TEST $CLI volume set $V0 trash-dir abc
74096c
 TEST start_vol $V0 $M0 $M0/abc
74096c
 TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
74096c
 EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
74096c
 
74096c
-# ensure that rename and delete operation on trash directory fails [63-65]
74096c
+# ensure that rename and delete operation on trash directory fails
74096c
 rm -rf $M0/abc/internal_op
74096c
 TEST [ -e $M0/abc/internal_op ]
74096c
 rm -rf $M0/abc/
74096c
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
74096c
index 90b4f14..6f2da11 100644
74096c
--- a/xlators/cluster/afr/src/afr-common.c
74096c
+++ b/xlators/cluster/afr/src/afr-common.c
74096c
@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv)
74096c
     return ENOTCONN;
74096c
 }
74096c
 
74096c
+gf_boolean_t
74096c
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
74096c
+                         pid_t pid)
74096c
+{
74096c
+    if (!__is_root_gfid(pargfid)) {
74096c
+        return _gf_false;
74096c
+    }
74096c
+
74096c
+    if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
74096c
+        /*For backward compatibility /.landfill is private*/
74096c
+        return _gf_true;
74096c
+    }
74096c
+
74096c
+    if (pid == GF_CLIENT_PID_GSYNCD) {
74096c
+        /*geo-rep needs to create/sync private directory on slave because
74096c
+         * it appears in changelog*/
74096c
+        return _gf_false;
74096c
+    }
74096c
+
74096c
+    if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
74096c
+        if (strcmp(name, priv->anon_inode_name) == 0) {
74096c
+            /* anonymous-inode dir is private*/
74096c
+            return _gf_true;
74096c
+        }
74096c
+    } else {
74096c
+        if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
74096c
+            0) {
74096c
+            /* anonymous-inode dir prefix is private for geo-rep to work*/
74096c
+            return _gf_true;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    return _gf_false;
74096c
+}
74096c
+
74096c
 int
74096c
 afr_fav_child_reset_sink_xattrs(void *opaque);
74096c
 
74096c
@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
74096c
         return 0;
74096c
     }
74096c
 
74096c
-    if (__is_root_gfid(loc->parent->gfid)) {
74096c
-        if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
74096c
-            op_errno = EPERM;
74096c
-            goto out;
74096c
-        }
74096c
+    if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
74096c
+                                 frame->root->pid)) {
74096c
+        op_errno = EPERM;
74096c
+        goto out;
74096c
     }
74096c
 
74096c
     local = AFR_FRAME_INIT(frame, op_errno);
74096c
@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this)
74096c
                        priv->background_self_heal_count);
74096c
     gf_proc_dump_write("healers", "%d", priv->healers);
74096c
     gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
74096c
+    gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
74096c
     if (priv->quorum_count == AFR_QUORUM_AUTO) {
74096c
         gf_proc_dump_write("quorum-type", "auto");
74096c
     } else if (priv->quorum_count == 0) {
74096c
@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv)
74096c
     GF_FREE(priv->local);
74096c
     GF_FREE(priv->pending_key);
74096c
     GF_FREE(priv->children);
74096c
+    GF_FREE(priv->anon_inode);
74096c
     GF_FREE(priv->child_up);
74096c
     GF_FREE(priv->child_latency);
74096c
     LOCK_DESTROY(&priv->lock);
74096c
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
74096c
index 6307b63..d64b6a9 100644
74096c
--- a/xlators/cluster/afr/src/afr-dir-read.c
74096c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
74096c
@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
74096c
 }
74096c
 
74096c
 static void
74096c
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
74096c
-                              gf_dirent_t *entries, fd_t *fd)
74096c
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
74096c
+                              int subvol, gf_dirent_t *entries, fd_t *fd)
74096c
 {
74096c
     int ret = -1;
74096c
     gf_dirent_t *entry = NULL;
74096c
@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
74096c
 
74096c
     list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
74096c
     {
74096c
-        if (__is_root_gfid(fd->inode->gfid) &&
74096c
-            !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
74096c
+        if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
74096c
+                                     frame->root->pid)) {
74096c
             continue;
74096c
         }
74096c
 
74096c
@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
     }
74096c
 
74096c
     if (op_ret >= 0)
74096c
-        afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
74096c
-                                      local->fd);
74096c
+        afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
74096c
+                                      &entries, local->fd);
74096c
 
74096c
     AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
index 9b6575f..0a8a7fd 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
74096c
@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
74096c
 out:
74096c
     return source;
74096c
 }
74096c
+
74096c
+static int
74096c
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
+                         int32_t op_ret, int32_t op_errno, inode_t *inode,
74096c
+                         struct iatt *buf, struct iatt *preparent,
74096c
+                         struct iatt *postparent, dict_t *xdata)
74096c
+{
74096c
+    afr_local_t *local = frame->local;
74096c
+    int i = (long)cookie;
74096c
+
74096c
+    local->replies[i].valid = 1;
74096c
+    local->replies[i].op_ret = op_ret;
74096c
+    local->replies[i].op_errno = op_errno;
74096c
+    if (op_ret == 0) {
74096c
+        local->op_ret = 0;
74096c
+        local->replies[i].poststat = *buf;
74096c
+        local->replies[i].preparent = *preparent;
74096c
+        local->replies[i].postparent = *postparent;
74096c
+    }
74096c
+    if (xdata) {
74096c
+        local->replies[i].xdata = dict_ref(xdata);
74096c
+    }
74096c
+
74096c
+    syncbarrier_wake(&local->barrier);
74096c
+    return 0;
74096c
+}
74096c
+
74096c
+int
74096c
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
74096c
+{
74096c
+    call_frame_t *frame = NULL;
74096c
+    afr_local_t *local = NULL;
74096c
+    afr_private_t *priv = this->private;
74096c
+    unsigned char *mkdir_on = alloca0(priv->child_count);
74096c
+    unsigned char *lookup_on = alloca0(priv->child_count);
74096c
+    loc_t loc = {0};
74096c
+    int32_t op_errno = 0;
74096c
+    int32_t child_op_errno = 0;
74096c
+    struct iatt iatt = {0};
74096c
+    dict_t *xdata = NULL;
74096c
+    uuid_t anon_inode_gfid = {0};
74096c
+    int mkdir_count = 0;
74096c
+    int i = 0;
74096c
+
74096c
+    /*Try to mkdir everywhere and return success if the dir exists on 'child'
74096c
+     */
74096c
+
74096c
+    if (!priv->use_anon_inode) {
74096c
+        op_errno = EINVAL;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    frame = afr_frame_create(this, &op_errno);
74096c
+    if (op_errno) {
74096c
+        goto out;
74096c
+    }
74096c
+    local = frame->local;
74096c
+    if (!local->child_up[child]) {
74096c
+        /*Other bricks may need mkdir so don't error out yet*/
74096c
+        child_op_errno = ENOTCONN;
74096c
+    }
74096c
+    gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
74096c
+    for (i = 0; i < priv->child_count; i++) {
74096c
+        if (!local->child_up[i])
74096c
+            continue;
74096c
+
74096c
+        if (priv->anon_inode[i]) {
74096c
+            mkdir_on[i] = 0;
74096c
+        } else {
74096c
+            mkdir_on[i] = 1;
74096c
+            mkdir_count++;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    if (mkdir_count == 0) {
74096c
+        *linked_inode = inode_find(this->itable, anon_inode_gfid);
74096c
+        if (*linked_inode) {
74096c
+            op_errno = 0;
74096c
+            goto out;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    loc.parent = inode_ref(this->itable->root);
74096c
+    loc.name = priv->anon_inode_name;
74096c
+    loc.inode = inode_new(this->itable);
74096c
+    if (!loc.inode) {
74096c
+        op_errno = ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    xdata = dict_new();
74096c
+    if (!xdata) {
74096c
+        op_errno = ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
74096c
+    if (op_errno) {
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    if (mkdir_count == 0) {
74096c
+        memcpy(lookup_on, local->child_up, priv->child_count);
74096c
+        goto lookup;
74096c
+    }
74096c
+
74096c
+    AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
74096c
+               xdata);
74096c
+
74096c
+    for (i = 0; i < priv->child_count; i++) {
74096c
+        if (!mkdir_on[i]) {
74096c
+            continue;
74096c
+        }
74096c
+
74096c
+        if (local->replies[i].op_ret == 0) {
74096c
+            priv->anon_inode[i] = 1;
74096c
+            iatt = local->replies[i].poststat;
74096c
+        } else if (local->replies[i].op_ret < 0 &&
74096c
+                   local->replies[i].op_errno == EEXIST) {
74096c
+            lookup_on[i] = 1;
74096c
+        } else if (i == child) {
74096c
+            child_op_errno = local->replies[i].op_errno;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
74096c
+        goto link;
74096c
+    }
74096c
+
74096c
+lookup:
74096c
+    AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
74096c
+               xdata);
74096c
+    for (i = 0; i < priv->child_count; i++) {
74096c
+        if (!lookup_on[i]) {
74096c
+            continue;
74096c
+        }
74096c
+
74096c
+        if (local->replies[i].op_ret == 0) {
74096c
+            if (gf_uuid_compare(anon_inode_gfid,
74096c
+                                local->replies[i].poststat.ia_gfid) == 0) {
74096c
+                priv->anon_inode[i] = 1;
74096c
+                iatt = local->replies[i].poststat;
74096c
+            } else {
74096c
+                if (i == child)
74096c
+                    child_op_errno = EINVAL;
74096c
+                gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
74096c
+                       "%s has gfid: %s", priv->anon_inode_name,
74096c
+                       uuid_utoa(local->replies[i].poststat.ia_gfid));
74096c
+            }
74096c
+        } else if (i == child) {
74096c
+            child_op_errno = local->replies[i].op_errno;
74096c
+        }
74096c
+    }
74096c
+link:
74096c
+    if (!gf_uuid_is_null(iatt.ia_gfid)) {
74096c
+        *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
74096c
+        if (*linked_inode) {
74096c
+            op_errno = 0;
74096c
+            inode_lookup(*linked_inode);
74096c
+        } else {
74096c
+            op_errno = ENOMEM;
74096c
+        }
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+out:
74096c
+    if (xdata)
74096c
+        dict_unref(xdata);
74096c
+    loc_wipe(&loc;;
74096c
+    /*child_op_errno takes precedence*/
74096c
+    if (child_op_errno == 0) {
74096c
+        child_op_errno = op_errno;
74096c
+    }
74096c
+
74096c
+    if (child_op_errno && *linked_inode) {
74096c
+        inode_unref(*linked_inode);
74096c
+        *linked_inode = NULL;
74096c
+    }
74096c
+    if (frame)
74096c
+        AFR_STACK_DESTROY(frame);
74096c
+    return -child_op_errno;
74096c
+}
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
index 00b5b2d..20b07dd 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
74096c
@@ -16,54 +16,170 @@
74096c
 #include <glusterfs/syncop-utils.h>
74096c
 #include <glusterfs/events.h>
74096c
 
74096c
-static int
74096c
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
74096c
-                          inode_t *inode, int child, struct afr_reply *replies)
74096c
+int
74096c
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
74096c
+                              inode_t *inode, int child,
74096c
+                              struct afr_reply *replies,
74096c
+                              gf_boolean_t *anon_inode)
74096c
 {
74096c
     afr_private_t *priv = NULL;
74096c
+    afr_local_t *local = NULL;
74096c
     xlator_t *subvol = NULL;
74096c
     int ret = 0;
74096c
+    int i = 0;
74096c
+    char g[64] = {0};
74096c
+    unsigned char *lookup_success = NULL;
74096c
+    call_frame_t *frame = NULL;
74096c
+    loc_t loc2 = {
74096c
+        0,
74096c
+    };
74096c
     loc_t loc = {
74096c
         0,
74096c
     };
74096c
-    char g[64];
74096c
 
74096c
     priv = this->private;
74096c
-
74096c
     subvol = priv->children[child];
74096c
+    lookup_success = alloca0(priv->child_count);
74096c
+    uuid_utoa_r(replies[child].poststat.ia_gfid, g);
74096c
+    loc.inode = inode_new(inode->table);
74096c
+    if (!loc.inode) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    if (replies[child].poststat.ia_type == IA_IFDIR) {
74096c
+        /* This directory may have sub-directory hierarchy which may need to
74096c
+         * be preserved for subsequent heals. So unconditionally move the
74096c
+         * directory to anonymous-inode directory*/
74096c
+        *anon_inode = _gf_true;
74096c
+        goto anon_inode;
74096c
+    }
74096c
+
74096c
+    frame = afr_frame_create(this, &ret;;
74096c
+    if (!frame) {
74096c
+        ret = -ret;
74096c
+        goto out;
74096c
+    }
74096c
+    local = frame->local;
74096c
+    gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
74096c
+    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
74096c
+               NULL);
74096c
+    for (i = 0; i < priv->child_count; i++) {
74096c
+        if (local->replies[i].op_ret == 0) {
74096c
+            lookup_success[i] = 1;
74096c
+        } else if (local->replies[i].op_errno != ENOENT &&
74096c
+                   local->replies[i].op_errno != ESTALE) {
74096c
+            ret = -local->replies[i].op_errno;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    if (priv->quorum_count) {
74096c
+        if (afr_has_quorum(lookup_success, this, NULL)) {
74096c
+            *anon_inode = _gf_true;
74096c
+        }
74096c
+    } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
74096c
+        *anon_inode = _gf_true;
74096c
+    } else if (ret) {
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+anon_inode:
74096c
+    if (!*anon_inode) {
74096c
+        ret = 0;
74096c
+        goto out;
74096c
+    }
74096c
 
74096c
     loc.parent = inode_ref(dir);
74096c
     gf_uuid_copy(loc.pargfid, dir->gfid);
74096c
     loc.name = name;
74096c
-    loc.inode = inode_ref(inode);
74096c
 
74096c
-    if (replies[child].valid && replies[child].op_ret == 0) {
74096c
-        switch (replies[child].poststat.ia_type) {
74096c
-            case IA_IFDIR:
74096c
-                gf_msg(this->name, GF_LOG_WARNING, 0,
74096c
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
74096c
-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
74096c
-                       subvol->name);
74096c
-                ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
74096c
-                break;
74096c
-            default:
74096c
-                gf_msg(this->name, GF_LOG_WARNING, 0,
74096c
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
-                       "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
74096c
-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
74096c
-                       subvol->name);
74096c
-                ret = syncop_unlink(subvol, &loc, NULL, NULL);
74096c
-                break;
74096c
-        }
74096c
+    ret = afr_anon_inode_create(this, child, &loc2.parent);
74096c
+    if (ret < 0)
74096c
+        goto out;
74096c
+
74096c
+    loc2.name = g;
74096c
+    ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
74096c
+    if (ret < 0) {
74096c
+        gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
+               "Rename to %s dir %s/%s (%s) on %s failed",
74096c
+               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
74096c
+               subvol->name);
74096c
+    } else {
74096c
+        gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
+               "Rename to %s dir %s/%s (%s) on %s successful",
74096c
+               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
74096c
+               subvol->name);
74096c
     }
74096c
 
74096c
+out:
74096c
     loc_wipe(&loc;;
74096c
+    loc_wipe(&loc2);
74096c
+    if (frame) {
74096c
+        AFR_STACK_DESTROY(frame);
74096c
+    }
74096c
 
74096c
     return ret;
74096c
 }
74096c
 
74096c
 int
74096c
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
74096c
+                          inode_t *inode, int child, struct afr_reply *replies)
74096c
+{
74096c
+    char g[64] = {0};
74096c
+    afr_private_t *priv = NULL;
74096c
+    xlator_t *subvol = NULL;
74096c
+    int ret = 0;
74096c
+    loc_t loc = {
74096c
+        0,
74096c
+    };
74096c
+    gf_boolean_t anon_inode = _gf_false;
74096c
+
74096c
+    priv = this->private;
74096c
+    subvol = priv->children[child];
74096c
+
74096c
+    if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
74096c
+        /*Nothing to do*/
74096c
+        ret = 0;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    if (priv->use_anon_inode) {
74096c
+        ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
74096c
+                                            replies, &anon_inode);
74096c
+        if (ret < 0 || anon_inode)
74096c
+            goto out;
74096c
+    }
74096c
+
74096c
+    loc.parent = inode_ref(dir);
74096c
+    loc.inode = inode_new(inode->table);
74096c
+    if (!loc.inode) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    loc.name = name;
74096c
+    switch (replies[child].poststat.ia_type) {
74096c
+        case IA_IFDIR:
74096c
+            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
+                   "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
74096c
+                   uuid_utoa_r(replies[child].poststat.ia_gfid, g),
74096c
+                   subvol->name);
74096c
+            ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
74096c
+            break;
74096c
+        default:
74096c
+            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
+                   "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
74096c
+                   name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
74096c
+                   subvol->name);
74096c
+            ret = syncop_unlink(subvol, &loc, NULL, NULL);
74096c
+            break;
74096c
+    }
74096c
+
74096c
+out:
74096c
+    loc_wipe(&loc;;
74096c
+    return ret;
74096c
+}
74096c
+
74096c
+int
74096c
 afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
74096c
                             unsigned char *sources, inode_t *dir,
74096c
                             const char *name, inode_t *inode,
74096c
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
74096c
     loc_t srcloc = {
74096c
         0,
74096c
     };
74096c
+    loc_t anonloc = {
74096c
+        0,
74096c
+    };
74096c
     xlator_t *this = frame->this;
74096c
     afr_private_t *priv = NULL;
74096c
     dict_t *xdata = NULL;
74096c
@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
74096c
         0,
74096c
     };
74096c
     unsigned char *newentry = NULL;
74096c
+    char iatt_uuid_str[64] = {0};
74096c
+    char dir_uuid_str[64] = {0};
74096c
 
74096c
     priv = this->private;
74096c
     iatt = &replies[source].poststat;
74096c
+    uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
74096c
     if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
74096c
         gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
74096c
                "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
74096c
                "pargfid=%s, name=%s",
74096c
-               iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
74096c
-               uuid_utoa(dir->gfid), name);
74096c
+               iatt->ia_type, iatt_uuid_str, source,
74096c
+               uuid_utoa_r(dir->gfid, dir_uuid_str), name);
74096c
         ret = -EINVAL;
74096c
         goto out;
74096c
     }
74096c
@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
74096c
 
74096c
     srcloc.inode = inode_ref(inode);
74096c
     gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
74096c
-    if (iatt->ia_type != IA_IFDIR)
74096c
-        ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
74096c
-    if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
74096c
+    ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
74096c
+    if (ret == -ENOENT || ret == -ESTALE) {
74096c
         newentry[dst] = 1;
74096c
         ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
74096c
                                          sources, newentry);
74096c
         if (ret)
74096c
             goto out;
74096c
+    } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
74096c
+        // Try rename from hidden directory
74096c
+        ret = afr_anon_inode_create(this, dst, &anonloc.parent);
74096c
+        if (ret < 0)
74096c
+            goto out;
74096c
+        anonloc.inode = inode_ref(inode);
74096c
+        anonloc.name = iatt_uuid_str;
74096c
+        ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
74096c
+        if (ret == -ENOENT || ret == -ESTALE)
74096c
+            ret = -1; /*This sets 'mismatch' to true*/
74096c
+        goto out;
74096c
     }
74096c
 
74096c
     mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
74096c
@@ -165,6 +297,7 @@ out:
74096c
     GF_FREE(linkname);
74096c
     loc_wipe(&loc;;
74096c
     loc_wipe(&srcloc);
74096c
+    loc_wipe(&anonloc);
74096c
     return ret;
74096c
 }
74096c
 
74096c
@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
 
74096c
     priv = this->private;
74096c
 
74096c
+    if (afr_is_private_directory(priv, fd->inode->gfid, name,
74096c
+                                 GF_CLIENT_PID_SELF_HEALD)) {
74096c
+        return 0;
74096c
+    }
74096c
+
74096c
     xattr = dict_new();
74096c
     if (!xattr)
74096c
         return -ENOMEM;
74096c
@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
                                           replies);
74096c
 
74096c
         if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
74096c
-            ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
74096c
+            ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
74096c
                                       inode->ia_type);
74096c
             /* Why is ret force-set to 0? We do not care about
74096c
              * index purge failing for full heal as it is quite
74096c
@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
             if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
74096c
                 continue;
74096c
 
74096c
-            if (__is_root_gfid(fd->inode->gfid) &&
74096c
-                !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
74096c
-                continue;
74096c
-
74096c
             ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
74096c
                                             loc.inode, subvol,
74096c
                                             local->need_full_crawl);
74096c
@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
74096c
         /* The name indices under the pgfid index dir are guaranteed
74096c
          * to be regular files. Hence the hardcoding.
74096c
          */
74096c
-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
74096c
+        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
74096c
         ret = 0;
74096c
         goto out;
74096c
     }
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
74096c
index dace071..51e3d8c 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
74096c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
74096c
                             const char *bname, inode_t *inode,
74096c
                             struct afr_reply *replies)
74096c
 {
74096c
-    loc_t loc = {
74096c
-        0,
74096c
-    };
74096c
     int i = 0;
74096c
     afr_private_t *priv = NULL;
74096c
-    char g[64];
74096c
     int ret = 0;
74096c
 
74096c
     priv = this->private;
74096c
 
74096c
-    loc.parent = inode_ref(parent);
74096c
-    gf_uuid_copy(loc.pargfid, pargfid);
74096c
-    loc.name = bname;
74096c
-    loc.inode = inode_ref(inode);
74096c
-
74096c
     for (i = 0; i < priv->child_count; i++) {
74096c
         if (!replies[i].valid)
74096c
             continue;
74096c
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
74096c
         if (replies[i].op_ret)
74096c
             continue;
74096c
 
74096c
-        switch (replies[i].poststat.ia_type) {
74096c
-            case IA_IFDIR:
74096c
-                gf_msg(this->name, GF_LOG_WARNING, 0,
74096c
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
74096c
-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
74096c
-                       priv->children[i]->name);
74096c
-
74096c
-                ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
74096c
-                break;
74096c
-            default:
74096c
-                gf_msg(this->name, GF_LOG_WARNING, 0,
74096c
-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
-                       "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
74096c
-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
74096c
-                       priv->children[i]->name);
74096c
-
74096c
-                ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
74096c
-                break;
74096c
-        }
74096c
+        ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
74096c
+                                         replies);
74096c
     }
74096c
 
74096c
-    loc_wipe(&loc;;
74096c
-
74096c
     return ret;
74096c
 }
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
74096c
index 8f6fb00..c8dc384 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heal.h
74096c
+++ b/xlators/cluster/afr/src/afr-self-heal.h
74096c
@@ -370,4 +370,9 @@ gf_boolean_t
74096c
 afr_is_file_empty_on_all_children(afr_private_t *priv,
74096c
                                   struct afr_reply *replies);
74096c
 
74096c
+int
74096c
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
74096c
+                          inode_t *inode, int child, struct afr_reply *replies);
74096c
+int
74096c
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
74096c
 #endif /* !_AFR_SELFHEAL_H */
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
74096c
index 95ac5f2..939a135 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heald.c
74096c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
74096c
@@ -222,7 +222,7 @@ out:
74096c
 }
74096c
 
74096c
 int
74096c
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
74096c
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
74096c
                     ia_type_t type)
74096c
 {
74096c
     int ret = 0;
74096c
@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
74096c
     ret = afr_shd_selfheal(healer, healer->subvol, gfid);
74096c
 
74096c
     if (ret == -ENOENT || ret == -ESTALE)
74096c
-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
74096c
+        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
74096c
 
74096c
     if (ret == 2)
74096c
         /* If bricks crashed in pre-op after creating indices/xattrop
74096c
@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv)
74096c
     return _gf_true;
74096c
 }
74096c
 
74096c
+static int
74096c
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
74096c
+                           void *data)
74096c
+{
74096c
+    struct subvol_healer *healer = data;
74096c
+    afr_private_t *priv = healer->this->private;
74096c
+    call_frame_t *frame = NULL;
74096c
+    afr_local_t *local = NULL;
74096c
+    int ret = 0;
74096c
+    loc_t loc = {0};
74096c
+    int count = 0;
74096c
+    int i = 0;
74096c
+    int op_errno = 0;
74096c
+    struct iatt *iatt = NULL;
74096c
+    gf_boolean_t multiple_links = _gf_false;
74096c
+    unsigned char *gfid_present = alloca0(priv->child_count);
74096c
+    unsigned char *entry_present = alloca0(priv->child_count);
74096c
+    char *type = "file";
74096c
+
74096c
+    frame = afr_frame_create(healer->this, &ret;;
74096c
+    if (!frame) {
74096c
+        ret = -ret;
74096c
+        goto out;
74096c
+    }
74096c
+    local = frame->local;
74096c
+    if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
74096c
+        gf_msg_debug(healer->this->name, 0,
74096c
+                     "Not all bricks are up. Skipping "
74096c
+                     "cleanup of %s on %s",
74096c
+                     entry->d_name, subvol->name);
74096c
+        ret = 0;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    loc.inode = inode_new(parent->inode->table);
74096c
+    if (!loc.inode) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    ret = gf_uuid_parse(entry->d_name, loc.gfid);
74096c
+    if (ret) {
74096c
+        ret = 0;
74096c
+        goto out;
74096c
+    }
74096c
+    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
74096c
+               NULL);
74096c
+    for (i = 0; i < priv->child_count; i++) {
74096c
+        if (local->replies[i].op_ret == 0) {
74096c
+            count++;
74096c
+            gfid_present[i] = 1;
74096c
+            iatt = &local->replies[i].poststat;
74096c
+            if (iatt->ia_type == IA_IFDIR) {
74096c
+                type = "dir";
74096c
+            }
74096c
+
74096c
+            if (i == healer->subvol) {
74096c
+                if (local->replies[i].poststat.ia_nlink > 1) {
74096c
+                    multiple_links = _gf_true;
74096c
+                }
74096c
+            }
74096c
+        } else if (local->replies[i].op_errno != ENOENT &&
74096c
+                   local->replies[i].op_errno != ESTALE) {
74096c
+            /*We don't have complete view. Skip the entry*/
74096c
+            gf_msg_debug(healer->this->name, local->replies[i].op_errno,
74096c
+                         "Skipping cleanup of %s on %s", entry->d_name,
74096c
+                         subvol->name);
74096c
+            ret = 0;
74096c
+            goto out;
74096c
+        }
74096c
+    }
74096c
+
74096c
+    /*Inode is deleted from subvol*/
74096c
+    if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
74096c
+        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
74096c
+               AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
74096c
+               priv->anon_inode_name, entry->d_name, subvol->name);
74096c
+        ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
74096c
+                                  iatt->ia_type);
74096c
+        if (ret == -ENOENT || ret == -ESTALE)
74096c
+            ret = 0;
74096c
+    } else if (count > 1) {
74096c
+        loc_wipe(&loc;;
74096c
+        loc.parent = inode_ref(parent->inode);
74096c
+        loc.name = entry->d_name;
74096c
+        loc.inode = inode_new(parent->inode->table);
74096c
+        if (!loc.inode) {
74096c
+            ret = -ENOMEM;
74096c
+            goto out;
74096c
+        }
74096c
+        AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
74096c
+                   &loc, NULL);
74096c
+        count = 0;
74096c
+        for (i = 0; i < priv->child_count; i++) {
74096c
+            if (local->replies[i].op_ret == 0) {
74096c
+                count++;
74096c
+                entry_present[i] = 1;
74096c
+                iatt = &local->replies[i].poststat;
74096c
+            } else if (local->replies[i].op_errno != ENOENT &&
74096c
+                       local->replies[i].op_errno != ESTALE) {
74096c
+                /*We don't have complete view. Skip the entry*/
74096c
+                gf_msg_debug(healer->this->name, local->replies[i].op_errno,
74096c
+                             "Skipping cleanup of %s on %s", entry->d_name,
74096c
+                             subvol->name);
74096c
+                ret = 0;
74096c
+                goto out;
74096c
+            }
74096c
+        }
74096c
+        for (i = 0; i < priv->child_count; i++) {
74096c
+            if (gfid_present[i] && !entry_present[i]) {
74096c
+                /*Entry is not anonymous on at least one subvol*/
74096c
+                gf_msg_debug(healer->this->name, 0,
74096c
+                             "Valid entry present on %s "
74096c
+                             "Skipping cleanup of %s on %s",
74096c
+                             priv->children[i]->name, entry->d_name,
74096c
+                             subvol->name);
74096c
+                ret = 0;
74096c
+                goto out;
74096c
+            }
74096c
+        }
74096c
+
74096c
+        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
74096c
+               AFR_MSG_EXPUNGING_FILE_OR_DIR,
74096c
+               "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
74096c
+               entry->d_name);
74096c
+        ret = 0;
74096c
+        for (i = 0; i < priv->child_count; i++) {
74096c
+            op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
74096c
+                                            entry->d_name, iatt->ia_type);
74096c
+            if (op_errno != ENOENT && op_errno != ESTALE) {
74096c
+                ret |= -op_errno;
74096c
+            }
74096c
+        }
74096c
+    }
74096c
+
74096c
+out:
74096c
+    if (frame)
74096c
+        AFR_STACK_DESTROY(frame);
74096c
+    loc_wipe(&loc;;
74096c
+    return ret;
74096c
+}
74096c
+
74096c
+static void
74096c
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
74096c
+{
74096c
+    int ret = 0;
74096c
+    call_frame_t *frame = NULL;
74096c
+    afr_private_t *priv = healer->this->private;
74096c
+    loc_t loc = {0};
74096c
+
74096c
+    ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
74096c
+    if (ret)
74096c
+        goto out;
74096c
+
74096c
+    frame = afr_frame_create(healer->this, &ret;;
74096c
+    if (!frame) {
74096c
+        ret = -ret;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
+    ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
74096c
+                             GF_CLIENT_PID_SELF_HEALD, healer,
74096c
+                             afr_shd_anon_inode_cleaner, NULL,
74096c
+                             priv->shd.max_threads, priv->shd.wait_qlength);
74096c
+out:
74096c
+    if (frame)
74096c
+        AFR_STACK_DESTROY(frame);
74096c
+    loc_wipe(&loc;;
74096c
+    return;
74096c
+}
74096c
+
74096c
 void *
74096c
 afr_shd_index_healer(void *data)
74096c
 {
74096c
@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data)
74096c
             sleep(1);
74096c
         } while (ret > 0);
74096c
 
74096c
+        if (ret == 0) {
74096c
+            afr_cleanup_anon_inode_dir(healer);
74096c
+        }
74096c
+
74096c
         if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
74096c
             afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
74096c
                                               pre_crawl_xdata);
74096c
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
74096c
index 1990539..acd567e 100644
74096c
--- a/xlators/cluster/afr/src/afr-self-heald.h
74096c
+++ b/xlators/cluster/afr/src/afr-self-heald.h
74096c
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
74096c
                      char **path_p);
74096c
 
74096c
 int
74096c
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
74096c
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
74096c
                     ia_type_t type);
74096c
 #endif /* !_AFR_SELF_HEALD_H */
74096c
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
74096c
index bfa464f..33fe4d8 100644
74096c
--- a/xlators/cluster/afr/src/afr.c
74096c
+++ b/xlators/cluster/afr/src/afr.c
74096c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
74096c
     }
74096c
 }
74096c
 
74096c
+void
74096c
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
74096c
+{
74096c
+    char *volfile_id_str = NULL;
74096c
+    uuid_t anon_inode_gfid = {0};
74096c
+
74096c
+    /*If volume id is not present don't enable anything*/
74096c
+    if (dict_get_str(options, "volume-id", &volfile_id_str))
74096c
+        return;
74096c
+    GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
74096c
+    /*anon_inode_name is not supposed to change once assigned*/
74096c
+    if (!priv->anon_inode_name[0]) {
74096c
+        snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
74096c
+                 AFR_ANON_DIR_PREFIX, volfile_id_str);
74096c
+        gf_uuid_parse(volfile_id_str, anon_inode_gfid);
74096c
+        /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
74096c
+        anon_inode_gfid[0] ^= 1;
74096c
+        uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
74096c
+    }
74096c
+}
74096c
+
74096c
 int
74096c
 reconfigure(xlator_t *this, dict_t *options)
74096c
 {
74096c
@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options)
74096c
         consistent_io = _gf_false;
74096c
     priv->consistent_io = consistent_io;
74096c
 
74096c
+    afr_handle_anon_inode_options(priv, options);
74096c
+
74096c
+    GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
74096c
+                     out);
74096c
     if (priv->shd.enabled) {
74096c
         if ((priv->shd.enabled != enabled_old) ||
74096c
             (timeout_old != priv->shd.timeout))
74096c
@@ -535,7 +560,9 @@ init(xlator_t *this)
74096c
 
74096c
     GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
74096c
     GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
74096c
+    afr_handle_anon_inode_options(priv, this->options);
74096c
 
74096c
+    GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
74096c
     if (priv->quorum_count != 0)
74096c
         priv->consistent_io = _gf_false;
74096c
 
74096c
@@ -547,13 +574,16 @@ init(xlator_t *this)
74096c
         goto out;
74096c
     }
74096c
 
74096c
+    priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
74096c
+                                 gf_afr_mt_char);
74096c
+
74096c
     priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
74096c
                                gf_afr_mt_char);
74096c
 
74096c
     priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
74096c
                                     gf_afr_mt_child_latency_t);
74096c
 
74096c
-    if (!priv->child_up || !priv->child_latency) {
74096c
+    if (!priv->child_up || !priv->child_latency || !priv->anon_inode) {
74096c
         ret = -ENOMEM;
74096c
         goto out;
74096c
     }
74096c
@@ -1218,6 +1248,14 @@ struct volume_options options[] = {
74096c
      .tags = {"replicate"},
74096c
      .description = "This option exists only for backward compatibility "
74096c
                     "and configuring it doesn't have any effect"},
74096c
+    {.key = {"use-anonymous-inode"},
74096c
+     .type = GF_OPTION_TYPE_BOOL,
74096c
+     .default_value = "no",
74096c
+     .op_version = {GD_OP_VERSION_7_0},
74096c
+     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
74096c
+     .tags = {"replicate"},
74096c
+     .description = "Setting this option heals directory renames efficiently"},
74096c
+
74096c
     {.key = {NULL}},
74096c
 };
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
74096c
index 3a2b26d..6a9a763 100644
74096c
--- a/xlators/cluster/afr/src/afr.h
74096c
+++ b/xlators/cluster/afr/src/afr.h
74096c
@@ -40,6 +40,8 @@
74096c
 #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
74096c
 
74096c
 #define AFR_HALO_MAX_LATENCY 99999
74096c
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
74096c
+
74096c
 
74096c
 #define PFLAG_PENDING (1 << 0)
74096c
 #define PFLAG_SBRAIN (1 << 1)
74096c
@@ -155,6 +157,7 @@ typedef struct _afr_private {
74096c
     struct list_head ta_waitq;
74096c
     struct list_head ta_onwireq;
74096c
 
74096c
+    unsigned char *anon_inode;
74096c
     unsigned char *child_up;
74096c
     int64_t *child_latency;
74096c
     unsigned char *local;
74096c
@@ -240,6 +243,11 @@ typedef struct _afr_private {
74096c
     gf_boolean_t esh_granular;
74096c
     gf_boolean_t consistent_io;
74096c
     gf_boolean_t data_self_heal; /* on/off */
74096c
+    gf_boolean_t use_anon_inode;
74096c
+
74096c
+    /*For anon-inode handling */
74096c
+    char anon_inode_name[NAME_MAX + 1];
74096c
+    char anon_gfid_str[UUID_SIZE + 1];
74096c
 } afr_private_t;
74096c
 
74096c
 typedef enum {
74096c
@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
74096c
 void
74096c
 afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
74096c
                          unsigned char *replies);
74096c
+gf_boolean_t
74096c
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
74096c
+                         pid_t pid);
74096c
 #endif /* __AFR_H__ */
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
74096c
index 094a71f..1920284 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
74096c
@@ -3867,6 +3867,38 @@ out:
74096c
 }
74096c
 
74096c
 static int
74096c
+set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
74096c
+                      int clusters)
74096c
+{
74096c
+    xlator_t *xlator = NULL;
74096c
+    int i = 0;
74096c
+    int ret = -1;
74096c
+    glusterd_conf_t *conf = NULL;
74096c
+    xlator_t *this = NULL;
74096c
+
74096c
+    this = THIS;
74096c
+    GF_VALIDATE_OR_GOTO("glusterd", this, out);
74096c
+    conf = this->private;
74096c
+    GF_VALIDATE_OR_GOTO(this->name, conf, out);
74096c
+
74096c
+    if (conf->op_version < GD_OP_VERSION_7_1)
74096c
+        return 0;
74096c
+    xlator = first_of(graph);
74096c
+
74096c
+    for (i = 0; i < clusters; i++) {
74096c
+        ret = xlator_set_fixed_option(xlator, "volume-id",
74096c
+                                      uuid_utoa(volinfo->volume_id));
74096c
+        if (ret)
74096c
+            goto out;
74096c
+
74096c
+        xlator = xlator->next;
74096c
+    }
74096c
+
74096c
+out:
74096c
+    return ret;
74096c
+}
74096c
+
74096c
+static int
74096c
 volgen_graph_build_afr_clusters(volgen_graph_t *graph,
74096c
                                 glusterd_volinfo_t *volinfo)
74096c
 {
74096c
@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
74096c
         clusters = -1;
74096c
         goto out;
74096c
     }
74096c
+
74096c
+    ret = set_volfile_id_option(graph, volinfo, clusters);
74096c
+    if (ret) {
74096c
+        clusters = -1;
74096c
+        goto out;
74096c
+    }
74096c
+
74096c
     if (!volinfo->arbiter_count)
74096c
         goto out;
74096c
 
74096c
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
74096c
index 62acadf..c1ca190 100644
74096c
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
74096c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
74096c
@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
74096c
      .voltype = "features/cloudsync",
74096c
      .op_version = GD_OP_VERSION_7_0,
74096c
      .flags = VOLOPT_FLAG_CLIENT_OPT},
74096c
+
74096c
+    {.key = "cluster.use-anonymous-inode",
74096c
+     .voltype = "cluster/replicate",
74096c
+     .op_version = GD_OP_VERSION_7_1,
74096c
+     .value = "yes",
74096c
+     .flags = VOLOPT_FLAG_CLIENT_OPT},
74096c
     {.key = NULL}};
74096c
-- 
74096c
1.8.3.1
74096c