Blob Blame History Raw
From 93ef66173442aaf4aeaeb161c6d6108eda54014a Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Thu, 12 Apr 2018 15:47:00 +0530
Subject: [PATCH 430/444] features/shard: Perform shards deletion in the
 background

> Upstream: https://review.gluster.org/19970
> BUG: 1568521
> Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3

A synctask is created that would scan the indices from
.shard/.remove_me, to delete the shards associated with the
gfid corresponding to the index bname and the rate of deletion
is controlled by the option features.shard-deletion-rate whose
default value is 100.
The task is launched on two accounts:
1. when shard receives its first-ever lookup on the volume
2. when a rename or unlink deleted an inode

Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3
BUG: 1520882
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/154864
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 libglusterfs/src/globals.h                      |   1 +
 tests/bugs/shard/bug-1568521-EEXIST.t           |  30 +-
 tests/bugs/shard/bug-1568521.t                  |  53 ++
 tests/bugs/shard/bug-shard-discard.t            |  19 +-
 tests/bugs/shard/shard-inode-refcount-test.t    |   5 +-
 tests/bugs/shard/unlinks-and-renames.t          | 123 ++--
 xlators/features/shard/src/shard-messages.h     |  18 +-
 xlators/features/shard/src/shard.c              | 816 +++++++++++++++++++-----
 xlators/features/shard/src/shard.h              |  19 +-
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |   5 +
 10 files changed, 829 insertions(+), 260 deletions(-)
 create mode 100644 tests/bugs/shard/bug-1568521.t

diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 8e218cb..699e73e 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -109,6 +109,7 @@
 
 #define GD_OP_VERSION_3_13_2   31302 /* Op-version for GlusterFS 3.13.2 */
 
+#define GD_OP_VERSION_4_2_0    40200 /* Op-version for GlusterFs  4.2.0 */
 
 /* Downstream only change */
 #define GD_OP_VERSION_3_11_2   31102 /* Op-version for RHGS 3.3.1-async */
diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t
index e4c3d41..7de400d 100644
--- a/tests/bugs/shard/bug-1568521-EEXIST.t
+++ b/tests/bugs/shard/bug-1568521-EEXIST.t
@@ -5,6 +5,12 @@
 
 cleanup
 
+function get_file_count {
+    ls $1* | wc -l
+}
+
+FILE_COUNT_TIME=5
+
 TEST glusterd
 TEST pidof glusterd
 TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
@@ -41,10 +47,14 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000
 sleep 2
 
 TEST unlink $M0/dir/file
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+TEST ! stat $B0/${V0}0/dir/file
+TEST ! stat $B0/${V0}1/dir/file
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_file
 
 ##############################
 ### Repeat test for rename ###
@@ -71,9 +81,13 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000
 sleep 2
 
 TEST mv -f $M0/src $M0/dir/dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+TEST ! stat $B0/${V0}0/src
+TEST ! stat $B0/${V0}1/src
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
 
 cleanup
diff --git a/tests/bugs/shard/bug-1568521.t b/tests/bugs/shard/bug-1568521.t
new file mode 100644
index 0000000..167fb63
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+
+function delete_files {
+        local mountpoint=$1;
+        local success=0;
+        local value=$2
+        for i in {1..500}; do
+                unlink $mountpoint/file-$i 2>/dev/null 1>/dev/null
+                if [ $? -eq 0 ]; then
+                        echo $2 >> $B0/output.txt
+                fi
+        done
+        echo $success
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+for i in {1..500}; do
+        dd if=/dev/urandom of=$M0/file-$i bs=1M count=2
+done
+
+for i in {1..500}; do
+        stat $M1/file-$i > /dev/null
+done
+
+delete_files $M0 0 &
+delete_files $M1 1 &
+wait
+
+success1=$(grep 0 $B0/output.txt | wc -l);
+success2=$(grep 1 $B0/output.txt | wc -l);
+
+echo "Success1 is $success1";
+echo "Success2 is $success2";
+
+success_total=$((success1 + success2));
+
+EXPECT 500 echo $success_total
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t
index 884d9e7..910ade1 100644
--- a/tests/bugs/shard/bug-shard-discard.t
+++ b/tests/bugs/shard/bug-shard-discard.t
@@ -5,6 +5,12 @@
 
 cleanup
 
+FILE_COUNT_TIME=5
+
+function get_shard_count {
+    ls $1/$2.* | wc -l
+}
+
 TEST glusterd
 TEST pidof glusterd
 TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
@@ -42,14 +48,11 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
 
 # Now unlink the file. And ensure that all shards associated with the file are cleaned up
 TEST unlink $M0/foo
-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}0/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}1/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}2/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}3/.shard $gfid_foo
 TEST ! stat $M0/foo
 
 #clean up everything
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
index c92dc07..087c8ba 100644
--- a/tests/bugs/shard/shard-inode-refcount-test.t
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
@@ -5,6 +5,8 @@
 
 cleanup
 
+SHARD_COUNT_TIME=5
+
 TEST glusterd
 TEST pidof glusterd
 TEST $CLI volume create $V0 $H0:$B0/${V0}0
@@ -18,7 +20,8 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
 
 ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
 TEST rm -f $M0/one-plus-five-shards
-#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0
+# Expect 5 inodes less. But one inode more than before because .remove_me would be created.
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0
 
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
 TEST $CLI volume stop $V0
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
index 997c397..6e5164f 100644
--- a/tests/bugs/shard/unlinks-and-renames.t
+++ b/tests/bugs/shard/unlinks-and-renames.t
@@ -9,6 +9,12 @@ cleanup
 # and rename fops in sharding and make sure they work fine.
 #
 
+FILE_COUNT_TIME=5
+
+function get_file_count {
+    ls $1* | wc -l
+}
+
 #################################################
 ################### UNLINK ######################
 #################################################
@@ -36,13 +42,8 @@ gfid_foo=$(get_gfid_string $M0/dir/foo)
 TEST unlink $M0/dir/foo
 TEST stat $B0/${V0}0/.shard/.remove_me
 TEST stat $B0/${V0}1/.shard/.remove_me
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
 
 ##################################################
 ##### Unlink of a sharded file without holes #####
@@ -56,20 +57,14 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1
 TEST stat $B0/${V0}0/.shard/$gfid_new.2
 TEST stat $B0/${V0}1/.shard/$gfid_new.2
 TEST unlink $M0/dir/new
-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_new
 TEST ! stat $M0/dir/new
 TEST ! stat $B0/${V0}0/dir/new
 TEST ! stat $B0/${V0}1/dir/new
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
 
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new
 #######################################
 ##### Unlink with /.shard present #####
 #######################################
@@ -83,13 +78,8 @@ TEST unlink $M0/dir/foo
 TEST ! stat $B0/${V0}0/dir/foo
 TEST ! stat $B0/${V0}1/dir/foo
 TEST ! stat $M0/dir/foo
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
 
 #############################################################
 ##### Unlink of a file with only one block (the zeroth) #####
@@ -102,13 +92,9 @@ TEST unlink $M0/dir/foo
 TEST ! stat $B0/${V0}0/dir/foo
 TEST ! stat $B0/${V0}1/dir/foo
 TEST ! stat $M0/dir/foo
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
 
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
 ####################################################
 ##### Unlink of a sharded file with hard-links #####
 ####################################################
@@ -137,22 +123,15 @@ TEST stat $B0/${V0}0/link
 TEST stat $B0/${V0}1/link
 # Now delete the last link.
 TEST unlink $M0/link
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_original
 # Ensure that the shards are all cleaned up.
-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
-#TEST ! stat $M0/link
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_original
+TEST ! stat $M0/link
 TEST ! stat $B0/${V0}0/link
 TEST ! stat $B0/${V0}1/link
 
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original
-
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
 TEST $CLI volume stop $V0
 TEST $CLI volume delete $V0
@@ -190,13 +169,8 @@ TEST ! stat $B0/${V0}0/dir/src
 TEST ! stat $B0/${V0}1/dir/src
 TEST   stat $B0/${V0}0/dir/dst
 TEST   stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
 
 ##################################################
 ##### Rename to a sharded file without holes #####
@@ -212,23 +186,16 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1
 TEST stat $B0/${V0}0/.shard/$gfid_dst.2
 TEST stat $B0/${V0}1/.shard/$gfid_dst.2
 TEST mv -f $M0/dir/src $M0/dir/dst
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
 TEST ! stat $M0/dir/src
 TEST   stat $M0/dir/dst
 TEST ! stat $B0/${V0}0/dir/src
 TEST ! stat $B0/${V0}1/dir/src
 TEST   stat $B0/${V0}0/dir/dst
 TEST   stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
 
 ###################################################
 ##### Rename of dst file with /.shard present #####
@@ -245,13 +212,8 @@ TEST ! stat $B0/${V0}0/dir/src
 TEST ! stat $B0/${V0}1/dir/src
 TEST   stat $B0/${V0}0/dir/dst
 TEST   stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
 
 ###############################################################
 ##### Rename of dst file with only one block (the zeroth) #####
@@ -268,13 +230,8 @@ TEST ! stat $B0/${V0}0/dir/src
 TEST ! stat $B0/${V0}1/dir/src
 TEST   stat $B0/${V0}0/dir/dst
 TEST   stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
 
 ########################################################
 ##### Rename to a dst sharded file with hard-links #####
@@ -307,20 +264,18 @@ TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
 TEST touch $M0/dir/src2
 TEST mv -f $M0/dir/src2 $M0/link
 # Ensure that the shards are all cleaned up.
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
 TEST ! stat $M0/dir/src2
 TEST ! stat $B0/${V0}0/dir/src2
 TEST ! stat $B0/${V0}1/dir/src2
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
 
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
 # Rename with non-existent dst and a sharded src
 TEST touch $M0/dir/src
 TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
index 0267f8a..bc04e5e 100644
--- a/xlators/features/shard/src/shard-messages.h
+++ b/xlators/features/shard/src/shard-messages.h
@@ -40,7 +40,7 @@
  */
 
 #define GLFS_COMP_BASE_SHARD      GLFS_MSGID_COMP_SHARD
-#define GLFS_NUM_MESSAGES         20
+#define GLFS_NUM_MESSAGES         22
 #define GLFS_MSGID_END          (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
 
 #define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
@@ -58,7 +58,7 @@
  * @diagnosis
  * @recommendedaction
  */
-#define SHARD_MSG_DICT_SET_FAILED                     (GLFS_COMP_BASE_SHARD + 2)
+#define SHARD_MSG_DICT_OP_FAILED                     (GLFS_COMP_BASE_SHARD + 2)
 
 
 /*!
@@ -194,5 +194,19 @@
 */
 #define SHARD_MSG_FOP_FAILED                         (GLFS_COMP_BASE_SHARD + 20)
 
+/*!
+ * @messageid 133021
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_SHARDS_DELETION_FAILED             (GLFS_COMP_BASE_SHARD + 21)
+
+/*!
+ * @messageid 133022
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_SHARDS_DELETION_COMPLETED          (GLFS_COMP_BASE_SHARD + 22)
+
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 #endif /* !_SHARD_MESSAGES_H_ */
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 492341c..2faf711 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -677,7 +677,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
                          * keep it alive by holding a ref on it.
                          */
                         inode_ref (linked_inode);
-                        gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+                        if (base_inode)
+                                gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
                         ctx->block_num = block_num;
                         list_add_tail (&ctx->ilist, &priv->ilist_head);
                         priv->inode_count++;
@@ -738,7 +739,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
                          * keep it alive by holding a ref on it.
                          */
                         inode_ref (linked_inode);
-                        gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+                        if (base_inode)
+                                gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
                         ctx->block_num = block_num;
                         ctx->base_inode = base_inode;
                         list_add_tail (&ctx->ilist, &priv->ilist_head);
@@ -977,6 +979,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
         int                   i              = -1;
         uint32_t              shard_idx_iter = 0;
         char                  path[PATH_MAX] = {0,};
+        uuid_t                gfid           = {0,};
         inode_t              *inode          = NULL;
         inode_t              *res_inode      = NULL;
         inode_t              *fsync_inode    = NULL;
@@ -988,6 +991,10 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
         local->call_count = 0;
         shard_idx_iter = local->first_block;
         res_inode = local->resolver_base_inode;
+        if (res_inode)
+                gf_uuid_copy (gfid, res_inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
 
         if ((local->op_ret < 0) || (local->resolve_not))
                 goto out;
@@ -1000,7 +1007,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
                         continue;
                 }
 
-                shard_make_block_abspath (shard_idx_iter, res_inode->gfid, path,
+                shard_make_block_abspath (shard_idx_iter, gfid, path,
                                           sizeof(path));
 
                 inode = NULL;
@@ -1147,7 +1154,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
         ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr,
                             8 * 4);
         if (ret) {
-                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
                         "Failed to set key %s into dict. gfid=%s",
                         GF_XATTR_SHARD_FILE_SIZE, uuid_utoa (inode->gfid));
                 GF_FREE (size_attr);
@@ -1376,7 +1383,7 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this,
 
         ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16);
         if (ret) {
-                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
                         "Failed to set gfid of %s into dict",
                         shard_internal_dir_string (type));
                 local->op_ret = -1;
@@ -1431,10 +1438,49 @@ shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata,
 }
 
 int
+shard_delete_shards (void *opaque);
+
+int
+shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data);
+
+int
+shard_start_background_deletion (xlator_t *this)
+{
+        int              ret           = 0;
+        call_frame_t    *cleanup_frame = NULL;
+
+        cleanup_frame = create_frame (this, this->ctx->pool);
+        if (!cleanup_frame) {
+                gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
+                        "new frame to delete shards");
+                return -ENOMEM;
+        }
+
+        ret = synctask_new (this->ctx->env, shard_delete_shards,
+                            shard_delete_shards_cbk, cleanup_frame,
+                            cleanup_frame);
+        if (ret < 0) {
+                gf_msg (this->name, GF_LOG_WARNING, errno,
+                        SHARD_MSG_SHARDS_DELETION_FAILED,
+                        "failed to create task to do background "
+                        "cleanup of shards");
+                STACK_DESTROY (cleanup_frame->root);
+        }
+        return ret;
+}
+
+int
 shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                   int32_t op_ret, int32_t op_errno, inode_t *inode,
                   struct iatt *buf, dict_t *xdata, struct iatt *postparent)
 {
+        int             ret             = 0;
+        shard_priv_t   *priv            = NULL;
+        gf_boolean_t    i_start_cleanup = _gf_false;
+
+        priv = this->private;
+
         if (op_ret < 0)
                 goto unwind;
 
@@ -1460,6 +1506,25 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         (void) shard_inode_ctx_update (inode, this, xdata, buf);
 
+        LOCK (&priv->lock);
+        {
+                if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
+                        priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+                        i_start_cleanup = _gf_true;
+                }
+        }
+        UNLOCK (&priv->lock);
+
+        if (i_start_cleanup) {
+                ret = shard_start_background_deletion (this);
+                if (ret) {
+                        LOCK (&priv->lock);
+                        {
+                                priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
+                        }
+                        UNLOCK (&priv->lock);
+                }
+        }
 unwind:
         SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
                             xdata, postparent);
@@ -1475,6 +1540,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
         uint64_t        block_size = 0;
         shard_local_t  *local      = NULL;
 
+        this->itable = loc->inode->table;
         if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
                 SHARD_ENTRY_FOP_CHECK (loc, op_errno, err);
         }
@@ -1496,7 +1562,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
                                        GF_XATTR_SHARD_BLOCK_SIZE, 0);
                 if (ret) {
                         gf_msg (this->name, GF_LOG_WARNING, 0,
-                                SHARD_MSG_DICT_SET_FAILED, "Failed to set dict"
+                                SHARD_MSG_DICT_OP_FAILED, "Failed to set dict"
                                 " value: key:%s for path %s",
                                 GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
                         goto err;
@@ -1508,7 +1574,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
                                        GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
                 if (ret) {
                         gf_msg (this->name, GF_LOG_WARNING, 0,
-                                SHARD_MSG_DICT_SET_FAILED,
+                                SHARD_MSG_DICT_OP_FAILED,
                                 "Failed to set dict value: key:%s for path %s.",
                                 GF_XATTR_SHARD_FILE_SIZE, loc->path);
                         goto err;
@@ -1901,12 +1967,6 @@ shard_truncate_last_shard (call_frame_t *frame, xlator_t *this, inode_t *inode)
         return 0;
 }
 
-int
-shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-                            int32_t op_ret, int32_t op_errno,
-                            struct iatt *preparent, struct iatt *postparent,
-                            dict_t *xdata);
-
 void
 shard_unlink_block_inode (shard_local_t *local, int shard_block_num);
 
@@ -1941,17 +2001,17 @@ done:
 int
 shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode)
 {
-        int i = 1;
-        int ret = -1;
-        int call_count = 0;
-        uint32_t cur_block = 0;
-        uint32_t last_block = 0;
-        char path[PATH_MAX] = {0,};
-        char *bname = NULL;
-        loc_t loc = {0,};
-        gf_boolean_t wind_failed = _gf_false;
-        shard_local_t *local = NULL;
-        shard_priv_t *priv = NULL;
+        int             i              = 1;
+        int             ret            = -1;
+        int             call_count     = 0;
+        uint32_t        cur_block      = 0;
+        uint32_t        last_block     = 0;
+        char            path[PATH_MAX] = {0,};
+        char           *bname          = NULL;
+        loc_t           loc            = {0,};
+        gf_boolean_t    wind_failed    = _gf_false;
+        shard_local_t  *local          = NULL;
+        shard_priv_t   *priv           = NULL;
 
         local = frame->local;
         priv = this->private;
@@ -2086,6 +2146,7 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
 {
         int             list_index       = 0;
         char            block_bname[256] = {0,};
+        uuid_t          gfid             = {0,};
         inode_t        *linked_inode     = NULL;
         xlator_t       *this             = NULL;
         inode_t        *fsync_inode      = NULL;
@@ -2093,9 +2154,12 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
 
         this = THIS;
         priv = this->private;
+        if (local->loc.inode)
+                gf_uuid_copy (gfid, local->loc.inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
 
-        shard_make_block_bname (block_num, (local->loc.inode)->gfid,
-                                block_bname, sizeof (block_bname));
+        shard_make_block_bname (block_num, gfid, block_bname, sizeof (block_bname));
 
         shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
         linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname,
@@ -2125,9 +2189,14 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
 {
         int             call_count      = 0;
         int             shard_block_num = (long) cookie;
+        uuid_t          gfid            = {0,};
         shard_local_t  *local           = NULL;
 
         local = frame->local;
+        if (local->resolver_base_inode)
+                gf_uuid_copy (gfid, local->resolver_base_inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
 
         if (op_ret < 0) {
                 /* Ignore absence of shards in the backend in truncate fop. */
@@ -2162,9 +2231,7 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
                 gf_msg (this->name, GF_LOG_ERROR, op_errno,
                         SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
                         "failed. Base file gfid = %s", shard_block_num,
-                        (local->fop == GF_FOP_RENAME) ?
-                        uuid_utoa (local->loc2.inode->gfid)
-                        : uuid_utoa (local->loc.inode->gfid));
+                        uuid_utoa (gfid));
                 local->op_ret = op_ret;
                 local->op_errno = op_errno;
                 goto done;
@@ -2173,25 +2240,18 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
         shard_link_block_inode (local, shard_block_num, inode, buf);
 
 done:
-        call_count = shard_call_count_return (frame);
         if (local->lookup_shards_barriered) {
                 syncbarrier_wake (&local->barrier);
                 return 0;
         } else {
+                call_count = shard_call_count_return (frame);
                 if (call_count == 0) {
                         if (!local->first_lookup_done)
                                 local->first_lookup_done = _gf_true;
-                        if (local->op_ret < 0)
-                                goto unwind;
-                        else
-                                local->pls_fop_handler (frame, this);
+                        local->pls_fop_handler (frame, this);
                 }
         }
         return 0;
-
-unwind:
-        local->pls_fop_handler (frame, this);
-        return 0;
 }
 
 dict_t*
@@ -2237,6 +2297,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
         int            last_block     = 0;
         char           path[PATH_MAX] = {0,};
         char          *bname          = NULL;
+        uuid_t         gfid           = {0,};
         loc_t          loc            = {0,};
         shard_local_t *local          = NULL;
         shard_priv_t  *priv           = NULL;
@@ -2252,6 +2313,11 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
         if (local->lookup_shards_barriered)
                 local->barrier.waitfor = local->call_count;
 
+        if (inode)
+                gf_uuid_copy (gfid, inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
+
         while (shard_idx_iter <= last_block) {
                 if (local->inode_list[i]) {
                         i++;
@@ -2267,7 +2333,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
                         goto next;
                 }
 
-                shard_make_block_abspath (shard_idx_iter, inode->gfid, path,
+                shard_make_block_abspath (shard_idx_iter, gfid, path,
                                           sizeof(path));
 
                 bname = strrchr (path, '/') + 1;
@@ -2279,7 +2345,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
                         gf_msg (this->name, GF_LOG_ERROR, 0,
                                 SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
                                 " on %s, base file gfid = %s", bname,
-                                uuid_utoa (inode->gfid));
+                                uuid_utoa (gfid));
                         local->op_ret = -1;
                         local->op_errno = ENOMEM;
                         loc_wipe (&loc);
@@ -2322,8 +2388,10 @@ next:
                 if (!--call_count)
                         break;
         }
-        if (local->lookup_shards_barriered)
+        if (local->lookup_shards_barriered) {
                 syncbarrier_wait (&local->barrier, count);
+                local->pls_fop_handler (frame, this);
+        }
         return 0;
 }
 
@@ -2779,8 +2847,9 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
         local = frame->local;
 
         if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
-                shard_common_failure_unwind (local->fop, frame, local->op_ret,
-                                             local->op_errno);
+                gf_msg (this->name, GF_LOG_ERROR, local->op_errno,
+                        SHARD_MSG_FOP_FAILED, "failed to delete shards of %s",
+                        uuid_utoa (local->resolver_base_inode->gfid));
                 return 0;
         }
         local->op_ret = 0;
@@ -2791,41 +2860,12 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
 }
 
 int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this);
-
-int32_t
-shard_unlink_cbk (call_frame_t *frame, xlator_t *this);
-
-int
 shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this)
 {
         shard_local_t *local = NULL;
 
         local = frame->local;
-
-        if (local->op_ret < 0) {
-                if (local->op_errno == ENOENT) {
-                        /* If lookup on /.shard fails with ENOENT, it probably
-                         * means that the file is being unlinked before it
-                         * could grow beyond its first block. In this case,
-                         * unlink boils down to unlinking the base file and
-                         * unwinding the call.
-                         */
-                        local->op_ret = 0;
-                        local->first_block = local->last_block = 0;
-                        local->num_blocks = 1;
-                        if (local->fop == GF_FOP_UNLINK)
-                                shard_unlink_cbk (frame, this);
-                        else
-                                shard_rename_cbk (frame, this);
-                        return 0;
-                } else {
-                        shard_common_failure_unwind (local->fop, frame,
-                                                     local->op_ret,
-                                                     local->op_errno);
-                        return 0;
-                }
-        }
+        local->lookup_shards_barriered = _gf_true;
 
         if (!local->call_count)
                 shard_unlink_shards_do (frame, this,
@@ -2841,6 +2881,7 @@ void
 shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
 {
         char                  block_bname[256]  = {0,};
+        uuid_t                gfid              = {0,};
         inode_t              *inode             = NULL;
         inode_t              *base_inode        = NULL;
         xlator_t             *this              = NULL;
@@ -2854,12 +2895,17 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
 
         inode = local->inode_list[shard_block_num - local->first_block];
         base_inode = local->resolver_base_inode;
+        if (base_inode)
+                gf_uuid_copy (gfid, base_inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
 
-        shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,
+        shard_make_block_bname (shard_block_num, gfid,
                                 block_bname, sizeof (block_bname));
 
         LOCK(&priv->lock);
-        LOCK(&base_inode->lock);
+        if (base_inode)
+                LOCK(&base_inode->lock);
         LOCK(&inode->lock);
         {
                 __shard_inode_ctx_get (inode, this, &ctx);
@@ -2870,14 +2916,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
                         unlink_unref_forget = _gf_true;
                 }
                 if (ctx->fsync_needed) {
-                        inode_unref (base_inode);
+                        if (base_inode)
+                                inode_unref (base_inode);
                         list_del_init (&ctx->to_fsync_list);
-                        __shard_inode_ctx_get (base_inode, this, &base_ictx);
-                        base_ictx->fsync_count--;
+                        if (base_inode) {
+                                __shard_inode_ctx_get (base_inode, this, &base_ictx);
+                                base_ictx->fsync_count--;
+                        }
                 }
         }
         UNLOCK(&inode->lock);
-        UNLOCK(&base_inode->lock);
+        if (base_inode)
+                UNLOCK(&base_inode->lock);
         if (unlink_unref_forget) {
                 inode_unlink (inode, priv->dot_shard_inode, block_bname);
                 inode_unref (inode);
@@ -2887,7 +2937,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
 }
 
 int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this);
+shard_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+        shard_local_t *local = NULL;
+
+        local = frame->local;
+
+        SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+                            &local->prebuf, &local->preoldparent,
+                            &local->postoldparent, &local->prenewparent,
+                            &local->postnewparent, local->xattr_rsp);
+        return 0;
+}
 
 int32_t
 shard_unlink_cbk (call_frame_t *frame, xlator_t *this)
@@ -2906,7 +2967,6 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                             struct iatt *preparent, struct iatt *postparent,
                             dict_t *xdata)
 {
-        int            call_count      = 0;
         int            shard_block_num = (long) cookie;
         shard_local_t *local           = NULL;
 
@@ -2919,22 +2979,8 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         }
 
         shard_unlink_block_inode (local, shard_block_num);
-
 done:
-        call_count = shard_call_count_return (frame);
-        if (local->unlink_shards_barriered) {
-                syncbarrier_wake (&local->barrier);
-        } else {
-
-                if (call_count == 0) {
-                        SHARD_UNSET_ROOT_FS_ID (frame, local);
-
-                        if (local->fop == GF_FOP_UNLINK)
-                                shard_unlink_cbk (frame, this);
-                        else if (local->fop == GF_FOP_RENAME)
-                                shard_rename_cbk (frame, this);
-                }
-        }
+        syncbarrier_wake (&local->barrier);
         return 0;
 }
 
@@ -2944,11 +2990,11 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
         int               i              = 0;
         int               ret            = -1;
         int               count          = 0;
-        int               call_count     = 0;
-        uint32_t          last_block     = 0;
         uint32_t          cur_block      = 0;
+        uint32_t          cur_block_idx  = 0;/*this is idx into inode_list[] array */
         char             *bname          = NULL;
         char              path[PATH_MAX] = {0,};
+        uuid_t           gfid            = {0,};
         loc_t             loc            = {0,};
         gf_boolean_t      wind_failed    = _gf_false;
         shard_local_t    *local          = NULL;
@@ -2957,16 +3003,12 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
         priv = this->private;
         local = frame->local;
 
-        /* local->num_blocks includes the base file block. This function only
-         * deletes the shards under /.shard. So subtract num_blocks by 1.
-         */
-        local->call_count = call_count = local->num_blocks - 1;
-        last_block = local->last_block;
+        if (inode)
+                gf_uuid_copy (gfid, inode->gfid);
+        else
+                gf_uuid_copy (gfid, local->base_gfid);
 
-        /* Ignore the inode associated with the base file and start counting
-         * from 1.
-         */
-        for (i = 1; i < local->num_blocks; i++) {
+        for (i = 0; i < local->num_blocks; i++) {
                 if (!local->inode_list[i])
                         continue;
                 count++;
@@ -2975,35 +3017,21 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
         if (!count) {
                 /* callcount = 0 implies that all of the shards that need to be
                  * unlinked are non-existent (in other words the file is full of
-                 * holes). So shard xlator can simply return the fop to its
-                 * parent now.
+                 * holes).
                  */
                 gf_msg_debug (this->name, 0, "All shards that need to be "
                               "unlinked are non-existent: %s",
-                              uuid_utoa (inode->gfid));
-                local->num_blocks = 1;
-                if (local->fop == GF_FOP_UNLINK) {
-                        shard_unlink_cbk (frame, this);
-                } else if (local->fop == GF_FOP_RENAME) {
-                        gf_msg_debug (this->name, 0, "Resuming rename()");
-                        shard_rename_cbk (frame, this);
-                }
+                              uuid_utoa (gfid));
                 return 0;
         }
 
-        local->call_count = call_count = count;
-        cur_block = 1;
         SHARD_SET_ROOT_FS_ID (frame, local);
-        if (local->unlink_shards_barriered)
-                local->barrier.waitfor = count;
+        local->barrier.waitfor = count;
+        cur_block = cur_block_idx + local->first_block;
 
-        /* Ignore the base file and start iterating from the first block shard.
-         */
-        while (cur_block <= last_block) {
-                if (!local->inode_list[cur_block]) {
-                        cur_block++;
-                        continue;
-                }
+        while (cur_block_idx < local->num_blocks) {
+                if (!local->inode_list[cur_block_idx])
+                        goto next;
 
                 if (wind_failed) {
                         shard_unlink_shards_do_cbk (frame,
@@ -3013,8 +3041,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
                         goto next;
                 }
 
-                shard_make_block_abspath (cur_block, inode->gfid, path,
-                                          sizeof (path));
+                shard_make_block_abspath (cur_block, gfid, path, sizeof (path));
                 bname = strrchr (path, '/') + 1;
                 loc.parent = inode_ref (priv->dot_shard_inode);
                 ret = inode_path (loc.parent, bname, (char **) &(loc.path));
@@ -3022,7 +3049,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
                         gf_msg (this->name, GF_LOG_ERROR, 0,
                                 SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
                                 " on %s, base file gfid = %s", bname,
-                                uuid_utoa (inode->gfid));
+                                uuid_utoa (gfid));
                         local->op_ret = -1;
                         local->op_errno = ENOMEM;
                         loc_wipe (&loc);
@@ -3037,26 +3064,505 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
                 loc.name = strrchr (loc.path, '/');
                 if (loc.name)
                         loc.name++;
-                loc.inode = inode_ref (local->inode_list[cur_block]);
+                loc.inode = inode_ref (local->inode_list[cur_block_idx]);
 
                 STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk,
                                    (void *) (long) cur_block, FIRST_CHILD(this),
                                    FIRST_CHILD (this)->fops->unlink, &loc,
                                    local->xflag, local->xattr_req);
                 loc_wipe (&loc);
-
 next:
                 cur_block++;
-                if (!--call_count)
-                        break;
+                cur_block_idx++;
         }
-        if (local->unlink_shards_barriered)
-                syncbarrier_wait (&local->barrier, count);
+        syncbarrier_wait (&local->barrier, count);
+        SHARD_UNSET_ROOT_FS_ID (frame, local);
+        return 0;
+}
+
+int
+shard_regulated_shards_deletion (call_frame_t *cleanup_frame, xlator_t *this,
+                                 int now, int first_block, gf_dirent_t *entry)
+{
+        int            i     = 0;
+        int            ret   = 0;
+        shard_local_t *local = NULL;
+        uuid_t         gfid  = {0,};
+
+        local = cleanup_frame->local;
+
+        local->inode_list = GF_CALLOC (now, sizeof (inode_t *),
+                                       gf_shard_mt_inode_list);
+        if (!local->inode_list)
+                return -ENOMEM;
+
+        local->first_block = first_block;
+        local->last_block = first_block + now - 1;
+        local->num_blocks = now;
+        gf_uuid_parse (entry->d_name, gfid);
+        gf_uuid_copy (local->base_gfid, gfid);
+        local->resolver_base_inode = inode_find (this->itable, gfid);
+        local->call_count = 0;
+        syncbarrier_init (&local->barrier);
+
+        shard_common_resolve_shards (cleanup_frame, this,
+                                     shard_post_resolve_unlink_handler);
+
+        for (i = 0; i < local->num_blocks; i++) {
+                if (local->inode_list[i])
+                        inode_unref (local->inode_list[i]);
+        }
+        GF_FREE (local->inode_list);
+        local->inode_list = NULL;
+        if (local->op_ret)
+                ret = -local->op_errno;
+        syncbarrier_destroy (&local->barrier);
+        inode_unref (local->resolver_base_inode);
+        local->resolver_base_inode = NULL;
+        STACK_RESET (cleanup_frame->root);
+        return ret;
+}
+
+
+int
+__shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
+                                gf_dirent_t *entry, inode_t *inode)
+{
+        int              ret           = 0;
+        int              shard_count   = 0;
+        int              first_block   = 0;
+        int              now           = 0;
+        uint64_t         size          = 0;
+        uint64_t         block_size    = 0;
+        uint64_t         size_array[4] = {0,};
+        void            *bsize         = NULL;
+        void            *size_attr     = NULL;
+        dict_t          *xattr_rsp     = NULL;
+        loc_t            loc           = {0,};
+        shard_local_t   *local         = NULL;
+        shard_priv_t    *priv          = NULL;
 
+        priv = this->private;
+        local = cleanup_frame->local;
+        ret = dict_reset (local->xattr_req);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+                        "Failed to reset dict");
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+                        "Failed to set dict value: key:%s",
+                        GF_XATTR_SHARD_BLOCK_SIZE);
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
+                               8 * 4);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+                        "Failed to set dict value: key:%s",
+                        GF_XATTR_SHARD_FILE_SIZE);
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        loc.inode = inode_ref (inode);
+        loc.parent = inode_ref (priv->dot_shard_rm_inode);
+        ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+        if (ret < 0) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+                        "Inode path  failed on %s", entry->d_name);
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        loc.name = strrchr (loc.path, '/');
+        if (loc.name)
+                loc.name++;
+        ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL,
+                             local->xattr_req, &xattr_rsp);
+        if (ret)
+                goto err;
+
+        ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+                        "Failed to get dict value: key:%s",
+                        GF_XATTR_SHARD_BLOCK_SIZE);
+                goto err;
+        }
+        block_size = ntoh64 (*((uint64_t *)bsize));
+
+        ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+                        "Failed to get dict value: key:%s",
+                        GF_XATTR_SHARD_FILE_SIZE);
+                goto err;
+        }
+
+        memcpy (size_array, size_attr, sizeof (size_array));
+        size = ntoh64 (size_array[0]);
+
+        shard_count = (size / block_size) - 1;
+        if (shard_count < 0) {
+                gf_msg_debug (this->name, 0, "Size of %s hasn't grown beyond "
+                              "its shard-block-size. Nothing to delete. "
+                              "Returning", entry->d_name);
+                /* File size < shard-block-size, so nothing to delete */
+                ret = 0;
+                goto delete_marker;
+        }
+        if ((size % block_size) > 0)
+                shard_count++;
+
+        if (shard_count == 0) {
+                gf_msg_debug (this->name, 0, "Size of %s is exactly equal to "
+                              "its shard-block-size. Nothing to delete. "
+                              "Returning", entry->d_name);
+                ret = 0;
+                goto delete_marker;
+        }
+        gf_msg_debug (this->name, 0, "base file = %s, "
+                      "shard-block-size=%"PRIu64", file-size=%"PRIu64", "
+                      "shard_count=%d", entry->d_name, block_size, size,
+                      shard_count);
+
+        /* Perform a gfid-based lookup to see if gfid corresponding to marker
+         * file's base name exists.
+         */
+        loc_wipe (&loc);
+        loc.inode = inode_new (this->itable);
+        if (!loc.inode) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        gf_uuid_parse (entry->d_name, loc.gfid);
+        ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+        if (!ret) {
+                gf_msg_debug (this->name, 0, "Base shard corresponding to gfid "
+                              "%s is present. Skipping shard deletion. "
+                              "Returning", entry->d_name);
+                ret = 0;
+                goto delete_marker;
+        }
+
+        first_block = 1;
+
+        while (shard_count) {
+                if (shard_count < local->deletion_rate) {
+                        now = shard_count;
+                        shard_count = 0;
+                } else {
+                        now = local->deletion_rate;
+                        shard_count -= local->deletion_rate;
+                }
+
+                gf_msg_debug (this->name, 0, "deleting %d shards starting from "
+                              "block %d of gfid %s", now, first_block,
+                              entry->d_name);
+                ret = shard_regulated_shards_deletion (cleanup_frame, this,
+                                                       now, first_block,
+                                                       entry);
+                if (ret)
+                        goto err;
+                first_block += now;
+        }
+
+delete_marker:
+        loc_wipe (&loc);
+        loc.inode = inode_ref (inode);
+        loc.parent = inode_ref (priv->dot_shard_rm_inode);
+        ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+        if (ret < 0) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+                        "Inode path  failed on %s", entry->d_name);
+                ret = -ENOMEM;
+                goto err;
+        }
+        loc.name = strrchr (loc.path, '/');
+        if (loc.name)
+                loc.name++;
+        ret = syncop_unlink (FIRST_CHILD(this), &loc, NULL, NULL);
+        if (ret)
+                gf_msg (this->name, GF_LOG_ERROR, 0,
+                        SHARD_MSG_SHARDS_DELETION_FAILED, "Failed to delete %s "
+                        "from /%s", entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+err:
+        if (xattr_rsp)
+                dict_unref (xattr_rsp);
+        loc_wipe (&loc);
+        return ret;
+}
+
+int
+shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
+                              gf_dirent_t *entry, inode_t *inode)
+{
+        int           ret  = -1;
+        loc_t         loc  = {0,};
+        shard_priv_t *priv = NULL;
+
+        priv = this->private;
+        loc.inode = inode_ref (priv->dot_shard_rm_inode);
+
+        ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc,
+                              entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL,
+                              NULL);
+        if (ret)
+                goto out;
+        {
+                ret = __shard_delete_shards_of_entry (cleanup_frame, this,
+                                                      entry, inode);
+        }
+        syncop_entrylk (FIRST_CHILD(this), this->name, &loc, entry->d_name,
+                        ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+out:
+        loc_wipe (&loc);
+        return ret;
+}
+
+int
+shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data)
+{
+        xlator_t     *this          = NULL;
+        shard_priv_t *priv          = NULL;
+
+        this = frame->this;
+        priv = this->private;
+
+        if (ret < 0) {
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
+                        SHARD_MSG_SHARDS_DELETION_FAILED,
+                        "Background deletion of shards failed");
+                priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
+        } else {
+                priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
+        }
+        SHARD_STACK_DESTROY (frame);
         return 0;
 }
 
 int
+shard_resolve_internal_dir (xlator_t *this, shard_local_t *local,
+                            shard_internal_dir_type_t type)
+{
+        int                  ret   = 0;
+        char                *bname = NULL;
+        loc_t               *loc   = NULL;
+        shard_priv_t        *priv  = NULL;
+        uuid_t               gfid  = {0,};
+        struct iatt          stbuf = {0,};
+
+        priv = this->private;
+
+        switch (type) {
+        case SHARD_INTERNAL_DIR_DOT_SHARD:
+                loc = &local->dot_shard_loc;
+                gf_uuid_copy (gfid, priv->dot_shard_gfid);
+                bname = GF_SHARD_DIR;
+                break;
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+                loc = &local->dot_shard_rm_loc;
+                gf_uuid_copy (gfid, priv->dot_shard_rm_gfid);
+                bname = GF_SHARD_REMOVE_ME_DIR;
+                break;
+        default:
+                break;
+        }
+
+        loc->inode = inode_find (this->itable, gfid);
+        if (!loc->inode) {
+                ret = shard_init_internal_dir_loc (this, local, type);
+                if (ret)
+                        goto err;
+                ret = dict_reset (local->xattr_req);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
+                                SHARD_MSG_DICT_OP_FAILED, "Failed to reset "
+                                "dict");
+                        ret = -ENOMEM;
+                        goto err;
+                }
+                ret = dict_set_static_bin (local->xattr_req, "gfid-req", gfid,
+                                           16);
+                ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL,
+                                     local->xattr_req, NULL);
+                if (ret < 0) {
+                        if (ret != -ENOENT)
+                                gf_msg (this->name, GF_LOG_ERROR, -ret,
+                                        SHARD_MSG_SHARDS_DELETION_FAILED,
+                                        "Lookup on %s failed, exiting", bname);
+                        goto err;
+                } else {
+                        shard_link_internal_dir_inode (local,
+                                                       loc->inode, &stbuf,
+                                                       type);
+                }
+        }
+        ret = 0;
+err:
+        return ret;
+}
+
+int
+shard_lookup_marker_entry (xlator_t *this, shard_local_t *local,
+                           gf_dirent_t *entry)
+{
+        int   ret = 0;
+        loc_t loc = {0,};
+
+        loc.inode = inode_new (this->itable);
+        if (!loc.inode) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        loc.parent = inode_ref (local->fd->inode);
+
+        ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+        if (ret < 0) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+                        "Inode path failed on %s", entry->d_name);
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        loc.name = strrchr (loc.path, '/');
+        if (loc.name)
+                loc.name++;
+
+        ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+        if (ret < 0) {
+                goto err;
+        }
+        entry->inode = inode_ref (loc.inode);
+        ret = 0;
+err:
+        loc_wipe (&loc);
+        return ret;
+}
+
+int
+shard_delete_shards (void *opaque)
+{
+        int              ret                        = 0;
+        off_t            offset                     = 0;
+        loc_t            loc                        = {0,};
+        inode_t         *link_inode                 = NULL;
+        xlator_t        *this                       = NULL;
+        shard_priv_t    *priv                       = NULL;
+        shard_local_t   *local                      = NULL;
+        gf_dirent_t      entries;
+        gf_dirent_t     *entry                      = NULL;
+        call_frame_t    *cleanup_frame              = NULL;
+
+        this = THIS;
+        priv = this->private;
+        INIT_LIST_HEAD (&entries.list);
+
+        cleanup_frame = opaque;
+
+        local = mem_get0 (this->local_pool);
+        if (!local) {
+                gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create local to "
+                        "delete shards");
+                ret = -ENOMEM;
+                goto err;
+        }
+        cleanup_frame->local = local;
+
+        local->xattr_req = dict_new ();
+        if (!local->xattr_req) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        local->deletion_rate = priv->deletion_rate;
+
+        ret = shard_resolve_internal_dir (this, local,
+                                          SHARD_INTERNAL_DIR_DOT_SHARD);
+        if (ret == -ENOENT) {
+                gf_msg_debug (this->name, 0, ".shard absent. Nothing to"
+                              " delete. Exiting");
+                ret = 0;
+                goto err;
+        } else if (ret < 0) {
+                goto err;
+        }
+
+        ret = shard_resolve_internal_dir (this, local,
+                                          SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+        if (ret == -ENOENT) {
+                gf_msg_debug (this->name, 0, ".remove_me absent. "
+                              "Nothing to delete. Exiting");
+                ret = 0;
+                goto err;
+        } else if (ret < 0) {
+                goto err;
+        }
+
+        local->fd = fd_anonymous (local->dot_shard_rm_loc.inode);
+        if (!local->fd) {
+                ret = -ENOMEM;
+                goto err;
+        }
+
+        while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072,
+                                      offset, &entries, local->xattr_req,
+                                      NULL))) {
+                if (ret > 0)
+                        ret = 0;
+                list_for_each_entry (entry, &entries.list, list) {
+                        offset = entry->d_off;
+
+                        if (!strcmp (entry->d_name, ".") ||
+                            !strcmp (entry->d_name, ".."))
+                                continue;
+
+                        if (!entry->inode) {
+                                ret = shard_lookup_marker_entry (this, local,
+                                                                 entry);
+                                if (ret < 0)
+                                        continue;
+                        }
+                        link_inode = inode_link (entry->inode, local->fd->inode,
+                                                 entry->d_name, &entry->d_stat);
+
+                        gf_msg_debug (this->name, 0, "Initiating deletion of "
+                                      "shards of gfid %s", entry->d_name);
+                        ret = shard_delete_shards_of_entry (cleanup_frame, this,
+                                                            entry, link_inode);
+                        inode_unlink (link_inode, local->fd->inode,
+                                      entry->d_name);
+                        inode_unref (link_inode);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, -ret,
+                                        SHARD_MSG_SHARDS_DELETION_FAILED,
+                                        "Failed to clean up shards of gfid %s",
+                                        entry->d_name);
+                                continue;
+                        }
+                        gf_msg (this->name, GF_LOG_INFO, 0,
+                                SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted "
+                                "shards of gfid=%s from backend",
+                                entry->d_name);
+                }
+                gf_dirent_free (&entries);
+                if (ret)
+                        break;
+        }
+        ret = 0;
+err:
+        loc_wipe (&loc);
+        return ret;
+}
+
+int
 shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
 {
@@ -3394,7 +3900,10 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                 local->postoldparent = *postparent;
                 if (xdata)
                         local->xattr_rsp = dict_ref (xdata);
+                if (local->cleanup_required)
+                        shard_start_background_deletion (this);
         }
+
         if (local->entrylk_frame) {
                 ret = shard_unlock_entrylk (frame, this);
                 if (ret < 0) {
@@ -3408,6 +3917,7 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                 local->op_ret = -1;
                 local->op_errno = -ret;
         }
+
         shard_unlink_cbk (frame, this);
         return 0;
 }
@@ -3576,6 +4086,7 @@ shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this)
         } else {
                 gf_msg_debug (this->name, 0, "link count on %s = 1, creating "
                               "file under .remove_me", local->int_inodelk.loc.path);
+                local->cleanup_required = _gf_true;
                 shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode,
                                        local->prebuf.ia_gfid);
         }
@@ -3788,20 +4299,6 @@ err:
 }
 
 int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this)
-{
-        shard_local_t *local = NULL;
-
-        local = frame->local;
-
-        SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
-                            &local->prebuf, &local->preoldparent,
-                            &local->postoldparent, &local->prenewparent,
-                            &local->postnewparent, local->xattr_rsp);
-        return 0;
-}
-
-int
 shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this)
 {
         shard_rename_cbk (frame, this);
@@ -3854,6 +4351,8 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         local->op_errno = -ret;
                         goto err;
                 }
+                if (local->cleanup_required)
+                        shard_start_background_deletion (this);
         }
 
         /* Now the base file of src, if sharded, is looked up to gather ia_size
@@ -4822,7 +5321,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
 
         if (dict_set_uint32 (local->xattr_req,
                              GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
-                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
                         "Failed to set "GLUSTERFS_WRITE_UPDATE_ATOMIC" into "
                         "dict: %s", uuid_utoa (fd->inode->gfid));
                 local->op_ret = -1;
@@ -5141,7 +5640,7 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this,
 
         ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16);
         if (ret) {
-                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+                gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
                         "Failed to set gfid-req for %s",
                         shard_internal_dir_string (type));
                 goto err;
@@ -6186,6 +6685,8 @@ init (xlator_t *this)
 
         GF_OPTION_INIT ("shard-block-size", priv->block_size, size_uint64, out);
 
+        GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
+
         this->local_pool = mem_pool_new (shard_local_t, 128);
         if (!this->local_pool) {
                 ret = -1;
@@ -6241,6 +6742,8 @@ reconfigure (xlator_t *this, dict_t *options)
         GF_OPTION_RECONF ("shard-block-size", priv->block_size, options, size,
                           out);
 
+        GF_OPTION_RECONF ("shard-deletion-rate", priv->deletion_rate, options,
+                          uint32, out);
         ret = 0;
 
 out:
@@ -6364,5 +6867,12 @@ struct volume_options options[] = {
            .description = "The size unit used to break a file into multiple "
                           "chunks",
         },
+        {  .key = {"shard-deletion-rate"},
+           .type = GF_OPTION_TYPE_INT,
+           .default_value = "100",
+           .min = 100,
+           .max = INT_MAX,
+           .description = "The number of shards to send deletes on at a time",
+        },
         { .key = {NULL} },
 };
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 1783ff6..5de098a 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -130,9 +130,9 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
                               sizeof (*__bs));                                \
         if (__ret) {                                                          \
                 gf_msg (this->name, GF_LOG_WARNING, 0,                        \
-                        SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s "   \
+                        SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s "    \
                         "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\
-                GF_FREE (__bs);                                               \
+                        GF_FREE (__bs);                                       \
                 goto label;                                                   \
         }                                                                     \
                                                                               \
@@ -144,7 +144,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
                               __size_attr, 8 * 4);                            \
         if (__ret) {                                                          \
                 gf_msg (this->name, GF_LOG_WARNING, 0,                        \
-                        SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s "   \
+                        SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s "   \
                         "on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path);   \
                 GF_FREE (__size_attr);                                        \
                 goto label;                                                   \
@@ -160,7 +160,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
                 local->op_ret = -1;                                           \
                 local->op_errno = ENOMEM;                                     \
                 gf_msg (this->name, GF_LOG_WARNING, 0,                        \
-                        SHARD_MSG_DICT_SET_FAILED, "Failed to set dict value:"\
+                        SHARD_MSG_DICT_OP_FAILED, "Failed to set dict value:"\
                         " key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE,          \
                         uuid_utoa (gfid));                                    \
                 goto label;                                                   \
@@ -197,6 +197,12 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
                 }                                                             \
         } while (0)
 
+typedef enum {
+        SHARD_FIRST_LOOKUP_PENDING = 0,
+        SHARD_FIRST_LOOKUP_IN_PROGRESS,
+        SHARD_FIRST_LOOKUP_DONE,
+} shard_first_lookup_state_t;
+
 /* rm = "remove me" */
 
 typedef struct shard_priv {
@@ -208,6 +214,8 @@ typedef struct shard_priv {
         gf_lock_t lock;
         int inode_count;
         struct list_head ilist_head;
+        uint32_t deletion_rate;
+        shard_first_lookup_state_t first_lookup;
 } shard_priv_t;
 
 typedef struct {
@@ -303,6 +311,9 @@ typedef struct shard_local {
         call_frame_t *main_frame;
         call_frame_t *inodelk_frame;
         call_frame_t *entrylk_frame;
+        uint32_t deletion_rate;
+        gf_boolean_t cleanup_required;
+        uuid_t base_gfid;
 } shard_local_t;
 
 typedef struct shard_inode_ctx {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 5a697cf..4357562 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3298,6 +3298,11 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = GD_OP_VERSION_3_7_0,
           .flags      = OPT_FLAG_CLIENT_OPT
         },
+        { .key        = "features.shard-deletion-rate",
+          .voltype    = "features/shard",
+          .op_version = GD_OP_VERSION_4_2_0,
+          .flags      = OPT_FLAG_CLIENT_OPT
+        },
         { .key        = "features.scrub-throttle",
           .voltype    = "features/bit-rot",
           .value      = "lazy",
-- 
1.8.3.1