887953
From b92aedc0b10d3c7b6150b8f18c950bf95494bc5f Mon Sep 17 00:00:00 2001
887953
From: Krutika Dhananjay <kdhananj@redhat.com>
887953
Date: Thu, 29 Mar 2018 17:21:32 +0530
887953
Subject: [PATCH 429/444] features/shard: Introducing ".shard/.remove_me" for
887953
 atomic shard deletion (part 1)
887953
887953
> Upstream: https://review.gluster.org/19929
887953
> BUG: 1568521
887953
> Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a
887953
887953
PROBLEM:
887953
Shards are deleted synchronously when a sharded file is unlinked or
887953
when a sharded file participating as the dst in a rename() is going to
887953
be replaced. The problem with this approach is it makes the operation
887953
really slow, sometimes causing the application to time out, especially
887953
with large files.
887953
887953
SOLUTION:
887953
To make this operation atomic, we introduce a ".remove_me" directory.
887953
Now renames and unlinks will simply involve two steps:
887953
1. creating an empty file under .remove_me named after the gfid of the file
887953
participating in unlink/rename
887953
2. carrying out the actual rename/unlink
887953
A synctask is created (more on that in part 2) to scan this directory
887953
after every unlink/rename operation (or upon a volume mount) and clean
887953
up all shards associated with it. All of this happens in the background.
887953
The task takes care to delete the shards associated with the gfid in
887953
.remove_me only if this gfid doesn't exist in backend, ensuring that the
887953
file was successfully renamed/unlinked and its shards can be discarded now
887953
safely.
887953
887953
Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a
887953
BUG: 1520882
887953
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/154863
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
887953
---
887953
 libglusterfs/src/common-utils.h              |    1 +
887953
 tests/bugs/shard/bug-1245547.t               |    4 +-
887953
 tests/bugs/shard/bug-1568521-EEXIST.t        |   79 ++
887953
 tests/bugs/shard/bug-shard-discard.t         |   16 +-
887953
 tests/bugs/shard/shard-inode-refcount-test.t |    2 +-
887953
 tests/bugs/shard/unlinks-and-renames.t       |  118 ++-
887953
 xlators/features/shard/src/shard-mem-types.h |    1 +
887953
 xlators/features/shard/src/shard-messages.h  |    9 +-
887953
 xlators/features/shard/src/shard.c           | 1384 ++++++++++++++++++--------
887953
 xlators/features/shard/src/shard.h           |  103 +-
887953
 10 files changed, 1250 insertions(+), 467 deletions(-)
887953
 create mode 100644 tests/bugs/shard/bug-1568521-EEXIST.t
887953
887953
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
887953
index e64dea3..c804ed5 100644
887953
--- a/libglusterfs/src/common-utils.h
887953
+++ b/libglusterfs/src/common-utils.h
887953
@@ -121,6 +121,7 @@ void trap (void);
887953
 /* Shard */
887953
 #define GF_XATTR_SHARD_FILE_SIZE  "trusted.glusterfs.shard.file-size"
887953
 #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
887953
+#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9"
887953
 
887953
 /* Lease: buffer length for stringified lease id
887953
  * Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum
887953
diff --git a/tests/bugs/shard/bug-1245547.t b/tests/bugs/shard/bug-1245547.t
887953
index c19b2a6..3c46785 100644
887953
--- a/tests/bugs/shard/bug-1245547.t
887953
+++ b/tests/bugs/shard/bug-1245547.t
887953
@@ -25,11 +25,11 @@ TEST touch $M0/bar
887953
 TEST truncate -s 10G $M0/bar
887953
 #Unlink on such a file should succeed.
887953
 TEST unlink $M0/bar
887953
-#
887953
+
887953
 #Create a file 'baz' with holes.
887953
 TEST touch $M0/baz
887953
 TEST truncate -s 10G $M0/baz
887953
 #Rename with a sharded existing dest that has holes must succeed.
887953
 TEST mv -f $M0/foo $M0/baz
887953
 
887953
-cleanup;
887953
+cleanup
887953
diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t
887953
new file mode 100644
887953
index 0000000..e4c3d41
887953
--- /dev/null
887953
+++ b/tests/bugs/shard/bug-1568521-EEXIST.t
887953
@@ -0,0 +1,79 @@
887953
+#!/bin/bash
887953
+
887953
+. $(dirname $0)/../../include.rc
887953
+. $(dirname $0)/../../volume.rc
887953
+
887953
+cleanup
887953
+
887953
+TEST glusterd
887953
+TEST pidof glusterd
887953
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
887953
+TEST $CLI volume set $V0 features.shard on
887953
+TEST $CLI volume set $V0 features.shard-block-size 4MB
887953
+TEST $CLI volume start $V0
887953
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
887953
+
887953
+TEST mkdir $M0/dir
887953
+# Unlink a temporary file to trigger creation of .remove_me
887953
+TEST touch $M0/tmp
887953
+TEST unlink $M0/tmp
887953
+
887953
+TEST stat $B0/${V0}0/.shard/.remove_me
887953
+TEST stat $B0/${V0}1/.shard/.remove_me
887953
+
887953
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216
887953
+gfid_file=$(get_gfid_string $M0/dir/file)
887953
+
887953
+# Create marker file from the backend to simulate ENODATA.
887953
+touch $B0/${V0}0/.shard/.remove_me/$gfid_file
887953
+touch $B0/${V0}1/.shard/.remove_me/$gfid_file
887953
+
887953
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
887953
+# and confirm that the correct values are set when the actual unlink takes place
887953
+
887953
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
887953
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
887953
+
887953
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
887953
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
887953
+
887953
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
887953
+sleep 2
887953
+
887953
+TEST unlink $M0/dir/file
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file
887953
+
887953
+##############################
887953
+### Repeat test for rename ###
887953
+##############################
887953
+
887953
+TEST touch $M0/src
887953
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
887953
+gfid_dst=$(get_gfid_string $M0/dir/dst)
887953
+
887953
+# Create marker file from the backend to simulate ENODATA.
887953
+touch $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+touch $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
887953
+# and confirm that the correct values are set when the actual unlink takes place
887953
+
887953
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
887953
+sleep 2
887953
+
887953
+TEST mv -f $M0/src $M0/dir/dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+cleanup
887953
diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t
887953
index 72d8586..884d9e7 100644
887953
--- a/tests/bugs/shard/bug-shard-discard.t
887953
+++ b/tests/bugs/shard/bug-shard-discard.t
887953
@@ -42,14 +42,14 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
887953
 
887953
 # Now unlink the file. And ensure that all shards associated with the file are cleaned up
887953
 TEST unlink $M0/foo
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
887953
-TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
887953
-TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
887953
-TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
887953
-TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
887953
+#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
887953
+#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
887953
+#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
887953
+#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
887953
 TEST ! stat $M0/foo
887953
 
887953
 #clean up everything
887953
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
887953
index 03e0cc9..c92dc07 100644
887953
--- a/tests/bugs/shard/shard-inode-refcount-test.t
887953
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
887953
@@ -18,7 +18,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
887953
 
887953
 ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
887953
 TEST rm -f $M0/one-plus-five-shards
887953
-EXPECT `expr $ACTIVE_INODES_BEFORE - 5` get_mount_active_size_value $V0
887953
+#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0
887953
 
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
 TEST $CLI volume stop $V0
887953
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
887953
index a8f188b..997c397 100644
887953
--- a/tests/bugs/shard/unlinks-and-renames.t
887953
+++ b/tests/bugs/shard/unlinks-and-renames.t
887953
@@ -32,7 +32,17 @@ TEST truncate -s 5M $M0/dir/foo
887953
 TEST ! stat $B0/${V0}0/.shard
887953
 TEST ! stat $B0/${V0}1/.shard
887953
 # Test to ensure that unlink doesn't fail due to absence of /.shard
887953
+gfid_foo=$(get_gfid_string $M0/dir/foo)
887953
 TEST unlink $M0/dir/foo
887953
+TEST stat $B0/${V0}0/.shard/.remove_me
887953
+TEST stat $B0/${V0}1/.shard/.remove_me
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
 
887953
 ##################################################
887953
 ##### Unlink of a sharded file without holes #####
887953
@@ -46,14 +56,20 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1
887953
 TEST stat $B0/${V0}0/.shard/$gfid_new.2
887953
 TEST stat $B0/${V0}1/.shard/$gfid_new.2
887953
 TEST unlink $M0/dir/new
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
887953
 TEST ! stat $M0/dir/new
887953
 TEST ! stat $B0/${V0}0/dir/new
887953
 TEST ! stat $B0/${V0}1/dir/new
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new
887953
 
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new
887953
 #######################################
887953
 ##### Unlink with /.shard present #####
887953
 #######################################
887953
@@ -67,18 +83,32 @@ TEST unlink $M0/dir/foo
887953
 TEST ! stat $B0/${V0}0/dir/foo
887953
 TEST ! stat $B0/${V0}1/dir/foo
887953
 TEST ! stat $M0/dir/foo
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
 
887953
 #############################################################
887953
 ##### Unlink of a file with only one block (the zeroth) #####
887953
 #############################################################
887953
 TEST touch $M0/dir/foo
887953
+gfid_foo=$(get_gfid_string $M0/dir/foo)
887953
 TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024
887953
-# Test to ensure that unlink of a sparse file works fine.
887953
+# Test to ensure that unlink of a file with only base shard works fine.
887953
 TEST unlink $M0/dir/foo
887953
 TEST ! stat $B0/${V0}0/dir/foo
887953
 TEST ! stat $B0/${V0}1/dir/foo
887953
 TEST ! stat $M0/dir/foo
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
 
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
887953
+EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
887953
 ####################################################
887953
 ##### Unlink of a sharded file with hard-links #####
887953
 ####################################################
887953
@@ -94,6 +124,8 @@ TEST stat $B0/${V0}1/.shard/$gfid_original.2
887953
 TEST ln $M0/dir/original $M0/link
887953
 # Now delete the original file.
887953
 TEST unlink $M0/dir/original
887953
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original
887953
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original
887953
 # Ensure the shards are still intact.
887953
 TEST stat $B0/${V0}0/.shard/$gfid_original.1
887953
 TEST stat $B0/${V0}1/.shard/$gfid_original.1
887953
@@ -105,15 +137,22 @@ TEST stat $B0/${V0}0/link
887953
 TEST stat $B0/${V0}1/link
887953
 # Now delete the last link.
887953
 TEST unlink $M0/link
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original
887953
 # Ensure that the shards are all cleaned up.
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
887953
-TEST ! stat $M0/link
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
887953
+#TEST ! stat $M0/link
887953
 TEST ! stat $B0/${V0}0/link
887953
 TEST ! stat $B0/${V0}1/link
887953
 
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original
887953
+
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
 TEST $CLI volume stop $V0
887953
 TEST $CLI volume delete $V0
887953
@@ -140,6 +179,7 @@ TEST touch $M0/dir/dst
887953
 ##### Rename with /.shard absent #####
887953
 ######################################
887953
 TEST truncate -s 5M $M0/dir/dst
887953
+gfid_dst=$(get_gfid_string $M0/dir/dst)
887953
 TEST ! stat $B0/${V0}0/.shard
887953
 TEST ! stat $B0/${V0}1/.shard
887953
 # Test to ensure that rename doesn't fail due to absence of /.shard
887953
@@ -150,6 +190,13 @@ TEST ! stat $B0/${V0}0/dir/src
887953
 TEST ! stat $B0/${V0}1/dir/src
887953
 TEST   stat $B0/${V0}0/dir/dst
887953
 TEST   stat $B0/${V0}1/dir/dst
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 
887953
 ##################################################
887953
 ##### Rename to a sharded file without holes #####
887953
@@ -165,16 +212,23 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1
887953
 TEST stat $B0/${V0}0/.shard/$gfid_dst.2
887953
 TEST stat $B0/${V0}1/.shard/$gfid_dst.2
887953
 TEST mv -f $M0/dir/src $M0/dir/dst
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
887953
 TEST ! stat $M0/dir/src
887953
 TEST   stat $M0/dir/dst
887953
 TEST ! stat $B0/${V0}0/dir/src
887953
 TEST ! stat $B0/${V0}1/dir/src
887953
 TEST   stat $B0/${V0}0/dir/dst
887953
 TEST   stat $B0/${V0}1/dir/dst
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 
887953
 ###################################################
887953
 ##### Rename of dst file with /.shard present #####
887953
@@ -182,7 +236,8 @@ TEST   stat $B0/${V0}1/dir/dst
887953
 TEST unlink $M0/dir/dst
887953
 TEST touch $M0/dir/src
887953
 TEST truncate -s 5M $M0/dir/dst
887953
-# Test to ensure that unlink of a sparse file works fine.
887953
+gfid_dst=$(get_gfid_string $M0/dir/dst)
887953
+# Test to ensure that rename into a sparse file works fine.
887953
 TEST mv -f $M0/dir/src $M0/dir/dst
887953
 TEST ! stat $M0/dir/src
887953
 TEST   stat $M0/dir/dst
887953
@@ -190,6 +245,13 @@ TEST ! stat $B0/${V0}0/dir/src
887953
 TEST ! stat $B0/${V0}1/dir/src
887953
 TEST   stat $B0/${V0}0/dir/dst
887953
 TEST   stat $B0/${V0}1/dir/dst
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 
887953
 ###############################################################
887953
 ##### Rename of dst file with only one block (the zeroth) #####
887953
@@ -197,7 +259,8 @@ TEST   stat $B0/${V0}1/dir/dst
887953
 TEST unlink $M0/dir/dst
887953
 TEST touch $M0/dir/src
887953
 TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024
887953
-# Test to ensure that unlink of a sparse file works fine.
887953
+gfid_dst=$(get_gfid_string $M0/dir/dst)
887953
+# Test to ensure that rename into a file with only base shard works fine.
887953
 TEST mv -f $M0/dir/src $M0/dir/dst
887953
 TEST ! stat $M0/dir/src
887953
 TEST   stat $M0/dir/dst
887953
@@ -205,6 +268,13 @@ TEST ! stat $B0/${V0}0/dir/src
887953
 TEST ! stat $B0/${V0}1/dir/src
887953
 TEST   stat $B0/${V0}0/dir/dst
887953
 TEST   stat $B0/${V0}1/dir/dst
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 
887953
 ########################################################
887953
 ##### Rename to a dst sharded file with hard-links #####
887953
@@ -231,18 +301,26 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.2
887953
 TEST ! stat $M0/dir/src
887953
 TEST ! stat $B0/${V0}0/dir/src
887953
 TEST ! stat $B0/${V0}1/dir/src
887953
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 # Now rename another file to the last link.
887953
 TEST touch $M0/dir/src2
887953
 TEST mv -f $M0/dir/src2 $M0/link
887953
 # Ensure that the shards are all cleaned up.
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
887953
-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
887953
-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
887953
+#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
887953
+#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
887953
 TEST ! stat $M0/dir/src2
887953
 TEST ! stat $B0/${V0}0/dir/src2
887953
 TEST ! stat $B0/${V0}1/dir/src2
887953
+TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
887953
+EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
887953
 # Rename with non-existent dst and a sharded src
887953
 TEST touch $M0/dir/src
887953
 TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216
887953
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h
887953
index 77f0cee..fea66aa 100644
887953
--- a/xlators/features/shard/src/shard-mem-types.h
887953
+++ b/xlators/features/shard/src/shard-mem-types.h
887953
@@ -18,6 +18,7 @@ enum gf_shard_mem_types_ {
887953
         gf_shard_mt_inode_ctx_t,
887953
         gf_shard_mt_iovec,
887953
         gf_shard_mt_int64_t,
887953
+        gf_shard_mt_uint64_t,
887953
         gf_shard_mt_end
887953
 };
887953
 #endif
887953
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
887953
index 8e61630..0267f8a 100644
887953
--- a/xlators/features/shard/src/shard-messages.h
887953
+++ b/xlators/features/shard/src/shard-messages.h
887953
@@ -40,7 +40,7 @@
887953
  */
887953
 
887953
 #define GLFS_COMP_BASE_SHARD      GLFS_MSGID_COMP_SHARD
887953
-#define GLFS_NUM_MESSAGES         19
887953
+#define GLFS_NUM_MESSAGES         20
887953
 #define GLFS_MSGID_END          (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
887953
 
887953
 #define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
887953
@@ -187,5 +187,12 @@
887953
 */
887953
 #define SHARD_MSG_MEMALLOC_FAILED                    (GLFS_COMP_BASE_SHARD + 19)
887953
 
887953
+/*!
887953
+ * @messageid 133020
887953
+ * @diagnosis
887953
+ * @recommendedaction
887953
+*/
887953
+#define SHARD_MSG_FOP_FAILED                         (GLFS_COMP_BASE_SHARD + 20)
887953
+
887953
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
887953
 #endif /* !_SHARD_MESSAGES_H_ */
887953
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
887953
index 268ba20..492341c 100644
887953
--- a/xlators/features/shard/src/shard.c
887953
+++ b/xlators/features/shard/src/shard.c
887953
@@ -117,9 +117,6 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
887953
         if (valid & SHARD_MASK_BLOCK_SIZE)
887953
                 ctx->block_size = block_size;
887953
 
887953
-        if (!stbuf)
887953
-                return 0;
887953
-
887953
         if (valid & SHARD_MASK_PROT)
887953
                 ctx->stat.ia_prot = stbuf->ia_prot;
887953
 
887953
@@ -179,7 +176,35 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
887953
 }
887953
 
887953
 int
887953
-__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
887953
+__shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this)
887953
+{
887953
+        int                 ret = -1;
887953
+        shard_inode_ctx_t  *ctx = NULL;
887953
+
887953
+        ret = __shard_inode_ctx_get (inode, this, &ctx;;
887953
+        if (ret)
887953
+                return ret;
887953
+
887953
+        ctx->refresh = _gf_true;
887953
+
887953
+        return 0;
887953
+}
887953
+int
887953
+shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this)
887953
+{
887953
+        int ret = -1;
887953
+
887953
+        LOCK (&inode->lock);
887953
+        {
887953
+                ret = __shard_inode_ctx_set_refresh_flag (inode, this);
887953
+        }
887953
+        UNLOCK (&inode->lock);
887953
+
887953
+        return ret;
887953
+}
887953
+
887953
+int
887953
+__shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this)
887953
 {
887953
         int                 ret = -1;
887953
         shard_inode_ctx_t  *ctx = NULL;
887953
@@ -193,13 +218,13 @@ __shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
887953
 }
887953
 
887953
 int
887953
-shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
887953
+shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this)
887953
 {
887953
         int ret = -1;
887953
 
887953
         LOCK (&inode->lock);
887953
         {
887953
-                ret = __shard_inode_ctx_set_refreshed_flag (inode, this);
887953
+                ret = __shard_inode_ctx_mark_dir_refreshed (inode, this);
887953
         }
887953
         UNLOCK (&inode->lock);
887953
 
887953
@@ -478,9 +503,15 @@ shard_local_wipe (shard_local_t *local)
887953
         syncbarrier_destroy (&local->barrier);
887953
         loc_wipe (&local->loc);
887953
         loc_wipe (&local->dot_shard_loc);
887953
+        loc_wipe (&local->dot_shard_rm_loc);
887953
         loc_wipe (&local->loc2);
887953
         loc_wipe (&local->tmp_loc);
887953
+        loc_wipe (&local->int_inodelk.loc);
887953
+        loc_wipe (&local->int_entrylk.loc);
887953
+        loc_wipe (&local->newloc);
887953
 
887953
+        if (local->int_entrylk.basename)
887953
+                GF_FREE (local->int_entrylk.basename);
887953
         if (local->fd)
887953
                 fd_unref (local->fd);
887953
 
887953
@@ -504,6 +535,10 @@ shard_local_wipe (shard_local_t *local)
887953
                 iobref_unref (local->iobref);
887953
         if (local->list_inited)
887953
                 gf_dirent_free (&local->entries_head);
887953
+        if (local->inodelk_frame)
887953
+                SHARD_STACK_DESTROY (local->inodelk_frame);
887953
+        if (local->entrylk_frame)
887953
+                SHARD_STACK_DESTROY (local->entrylk_frame);
887953
 }
887953
 
887953
 int
887953
@@ -554,7 +589,10 @@ shard_internal_dir_string (shard_internal_dir_type_t type)
887953
 
887953
         switch (type) {
887953
         case SHARD_INTERNAL_DIR_DOT_SHARD:
887953
-                str = ".shard";
887953
+                str = GF_SHARD_DIR;
887953
+                break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                str = GF_SHARD_REMOVE_ME_DIR;
887953
                 break;
887953
         default:
887953
                 break;
887953
@@ -566,10 +604,13 @@ static int
887953
 shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local,
887953
                              shard_internal_dir_type_t type)
887953
 {
887953
-        int    ret              = -1;
887953
-        char  *bname            = NULL;
887953
-        loc_t *internal_dir_loc = NULL;
887953
+        int           ret              = -1;
887953
+        char         *bname            = NULL;
887953
+        inode_t      *parent           = NULL;
887953
+        loc_t        *internal_dir_loc = NULL;
887953
+        shard_priv_t *priv             = NULL;
887953
 
887953
+        priv = this->private;
887953
         if (!local)
887953
                 return -1;
887953
 
887953
@@ -577,13 +618,19 @@ shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local,
887953
         case SHARD_INTERNAL_DIR_DOT_SHARD:
887953
                 internal_dir_loc = &local->dot_shard_loc;
887953
                 bname = GF_SHARD_DIR;
887953
+                parent = inode_ref (this->itable->root);
887953
+                break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                internal_dir_loc = &local->dot_shard_rm_loc;
887953
+                bname = GF_SHARD_REMOVE_ME_DIR;
887953
+                parent = inode_ref (priv->dot_shard_inode);
887953
                 break;
887953
         default:
887953
                 break;
887953
         }
887953
 
887953
         internal_dir_loc->inode = inode_new (this->itable);
887953
-        internal_dir_loc->parent = inode_ref (this->itable->root);
887953
+        internal_dir_loc->parent = parent;
887953
         ret = inode_path (internal_dir_loc->parent, bname,
887953
                           (char **)&internal_dir_loc->path);
887953
         if (ret < 0 || !(internal_dir_loc->inode)) {
887953
@@ -706,11 +753,48 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
887953
 }
887953
 
887953
 int
887953
-shard_common_inode_write_failure_unwind (glusterfs_fop_t fop,
887953
-                                         call_frame_t *frame, int32_t op_ret,
887953
-                                         int32_t op_errno)
887953
+shard_common_failure_unwind (glusterfs_fop_t fop, call_frame_t *frame,
887953
+                             int32_t op_ret, int32_t op_errno)
887953
 {
887953
         switch (fop) {
887953
+        case GF_FOP_LOOKUP:
887953
+                SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL,
887953
+                                    NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_STAT:
887953
+                SHARD_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_FSTAT:
887953
+                SHARD_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_TRUNCATE:
887953
+                SHARD_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_FTRUNCATE:
887953
+                SHARD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_MKNOD:
887953
+                SHARD_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_LINK:
887953
+                SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_CREATE:
887953
+                SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL, NULL, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_UNLINK:
887953
+                SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL,
887953
+                                    NULL);
887953
+                break;
887953
+        case GF_FOP_RENAME:
887953
+                SHARD_STACK_UNWIND (rename, frame, op_ret, op_errno, NULL, NULL,
887953
+                                    NULL, NULL, NULL, NULL);
887953
+                break;
887953
         case GF_FOP_WRITE:
887953
                 SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno,
887953
                                     NULL, NULL, NULL);
887953
@@ -727,6 +811,45 @@ shard_common_inode_write_failure_unwind (glusterfs_fop_t fop,
887953
                 SHARD_STACK_UNWIND (discard, frame, op_ret, op_errno,
887953
                                     NULL, NULL, NULL);
887953
                 break;
887953
+        case GF_FOP_READ:
887953
+                SHARD_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1,
887953
+                                    NULL, NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_FSYNC:
887953
+                SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL,
887953
+                                    NULL);
887953
+                break;
887953
+        case GF_FOP_REMOVEXATTR:
887953
+                SHARD_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL);
887953
+                break;
887953
+        case GF_FOP_FREMOVEXATTR:
887953
+                SHARD_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
887953
+                break;
887953
+        case GF_FOP_FGETXATTR:
887953
+                SHARD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
887953
+                                    NULL);
887953
+                break;
887953
+        case GF_FOP_GETXATTR:
887953
+                SHARD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL,
887953
+                                    NULL);
887953
+                break;
887953
+        case GF_FOP_FSETXATTR:
887953
+                SHARD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
887953
+                break;
887953
+        case GF_FOP_SETXATTR:
887953
+                SHARD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
887953
+                break;
887953
+        case GF_FOP_SETATTR:
887953
+                SHARD_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_FSETATTR:
887953
+                SHARD_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL,
887953
+                                    NULL, NULL);
887953
+                break;
887953
+        case GF_FOP_SEEK:
887953
+                SHARD_STACK_UNWIND (seek, frame, op_ret, op_errno, 0, NULL);
887953
+                break;
887953
         default:
887953
                 gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
887953
                         "Invalid fop id = %d", fop);
887953
@@ -866,7 +989,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
887953
         shard_idx_iter = local->first_block;
887953
         res_inode = local->resolver_base_inode;
887953
 
887953
-        if (local->op_ret < 0)
887953
+        if ((local->op_ret < 0) || (local->resolve_not))
887953
                 goto out;
887953
 
887953
         while (shard_idx_iter <= local->last_block) {
887953
@@ -1063,19 +1186,26 @@ shard_link_internal_dir_inode (shard_local_t *local, inode_t *inode,
887953
         shard_priv_t  *priv         = NULL;
887953
         char          *bname        = NULL;
887953
         inode_t       **priv_inode  = NULL;
887953
+        inode_t       *parent       = NULL;
887953
 
887953
         priv = THIS->private;
887953
 
887953
         switch (type) {
887953
         case SHARD_INTERNAL_DIR_DOT_SHARD:
887953
-                bname = ".shard";
887953
+                bname = GF_SHARD_DIR;
887953
                 priv_inode = &priv->dot_shard_inode;
887953
+                parent = inode->table->root;
887953
+                break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                bname = GF_SHARD_REMOVE_ME_DIR;
887953
+                priv_inode = &priv->dot_shard_rm_inode;
887953
+                parent = priv->dot_shard_inode;
887953
                 break;
887953
         default:
887953
                 break;
887953
         }
887953
 
887953
-        linked_inode = inode_link (inode, inode->table->root, bname, buf);
887953
+        linked_inode = inode_link (inode, parent, bname, buf);
887953
         inode_lookup (linked_inode);
887953
         *priv_inode = linked_inode;
887953
         return linked_inode;
887953
@@ -1105,7 +1235,7 @@ shard_refresh_internal_dir_cbk (call_frame_t *frame, void *cookie,
887953
          * shard_link_internal_dir_inode().
887953
          */
887953
         linked_inode = shard_link_internal_dir_inode (local, inode, buf, type);
887953
-        shard_inode_ctx_set_refreshed_flag (linked_inode, this);
887953
+        shard_inode_ctx_mark_dir_refreshed (linked_inode, this);
887953
 out:
887953
         shard_common_resolve_shards (frame, this, local->post_res_handler);
887953
         return 0;
887953
@@ -1128,6 +1258,9 @@ shard_refresh_internal_dir (call_frame_t *frame, xlator_t *this,
887953
         case SHARD_INTERNAL_DIR_DOT_SHARD:
887953
                 gf_uuid_copy (gfid, priv->dot_shard_gfid);
887953
                 break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                gf_uuid_copy (gfid, priv->dot_shard_rm_gfid);
887953
+                break;
887953
         default:
887953
                 break;
887953
         }
887953
@@ -1189,7 +1322,7 @@ shard_lookup_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this
887953
         if (link_inode != inode) {
887953
                 shard_refresh_internal_dir (frame, this, type);
887953
         } else {
887953
-                shard_inode_ctx_set_refreshed_flag (link_inode, this);
887953
+                shard_inode_ctx_mark_dir_refreshed (link_inode, this);
887953
                 shard_common_resolve_shards (frame, this,
887953
                                              local->post_res_handler);
887953
         }
887953
@@ -1233,6 +1366,10 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this,
887953
                 gf_uuid_copy (*gfid, priv->dot_shard_gfid);
887953
                 loc = &local->dot_shard_loc;
887953
                 break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid);
887953
+                loc = &local->dot_shard_rm_loc;
887953
+                break;
887953
         default:
887953
                 break;
887953
         }
887953
@@ -1383,13 +1520,9 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
887953
 
887953
         STACK_WIND (frame, shard_lookup_cbk, FIRST_CHILD (this),
887953
                     FIRST_CHILD (this)->fops->lookup, loc, local->xattr_req);
887953
-
887953
         return 0;
887953
-
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL,
887953
-                             NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_LOOKUP, frame, -1, op_errno);
887953
         return 0;
887953
 
887953
 }
887953
@@ -1610,11 +1743,9 @@ shard_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
887953
 
887953
         STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this),
887953
                     FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
887953
-
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_STAT, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -1668,9 +1799,8 @@ shard_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
887953
         STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this),
887953
                     FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FSTAT, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -1728,14 +1858,9 @@ shard_truncate_last_shard_cbk (call_frame_t *frame, void *cookie,
887953
         shard_update_file_size (frame, this, NULL, &local->loc,
887953
                                 shard_post_update_size_truncate_handler);
887953
         return 0;
887953
-
887953
 err:
887953
-        if (local->fop == GF_FOP_TRUNCATE)
887953
-                SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL);
887953
-        else
887953
-                SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                     local->op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -1946,12 +2071,8 @@ shard_post_lookup_shards_truncate_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                if (local->fop == GF_FOP_TRUNCATE)
887953
-                        SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL);
887953
-                else
887953
-                        SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -2232,16 +2353,9 @@ shard_post_resolve_truncate_handler (call_frame_t *frame, xlator_t *this)
887953
                                        shard_post_update_size_truncate_handler);
887953
                         return 0;
887953
                 } else {
887953
-                        if (local->fop == GF_FOP_TRUNCATE)
887953
-                                SHARD_STACK_UNWIND (truncate, frame,
887953
-                                                    local->op_ret,
887953
-                                                    local->op_errno, NULL, NULL,
887953
-                                                    NULL);
887953
-                        else
887953
-                                SHARD_STACK_UNWIND (ftruncate, frame,
887953
-                                                    local->op_ret,
887953
-                                                    local->op_errno, NULL, NULL,
887953
-                                                    NULL);
887953
+                        shard_common_failure_unwind (local->fop, frame,
887953
+                                                     local->op_ret,
887953
+                                                     local->op_errno);
887953
                         return 0;
887953
                 }
887953
         }
887953
@@ -2329,14 +2443,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this)
887953
         return 0;
887953
 
887953
 err:
887953
-        if (local->fop == GF_FOP_TRUNCATE)
887953
-                SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL,
887953
-                                    NULL);
887953
-        else
887953
-                SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL,
887953
-                                    NULL);
887953
-
887953
-       return 0;
887953
+        shard_common_failure_unwind (local->fop, frame, -1, ENOMEM);
887953
+        return 0;
887953
 }
887953
 
887953
 int
887953
@@ -2348,13 +2456,8 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                if (local->fop == GF_FOP_TRUNCATE)
887953
-                        SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL);
887953
-                else
887953
-                        SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL);
887953
-
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -2457,7 +2560,7 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
887953
         return 0;
887953
 
887953
 err:
887953
-        SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_TRUNCATE, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -2512,8 +2615,7 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
887953
                                 shard_post_lookup_truncate_handler);
887953
         return 0;
887953
 err:
887953
-
887953
-        SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -2531,7 +2633,7 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
         if (op_ret == -1)
887953
                 goto unwind;
887953
 
887953
-        ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size),
887953
+        ret = shard_inode_ctx_set (inode, this, buf, local->block_size,
887953
                                    SHARD_ALL_MASK);
887953
         if (ret)
887953
                 gf_msg (this->name, GF_LOG_WARNING, 0,
887953
@@ -2549,25 +2651,27 @@ int
887953
 shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
887953
              dev_t rdev, mode_t umask, dict_t *xdata)
887953
 {
887953
+        shard_priv_t   *priv       = NULL;
887953
         shard_local_t  *local      = NULL;
887953
 
887953
+        priv = this->private;
887953
         local = mem_get0 (this->local_pool);
887953
         if (!local)
887953
                 goto err;
887953
 
887953
         frame->local = local;
887953
+        local->block_size = priv->block_size;
887953
         if (!__is_gsyncd_on_shard_dir (frame, loc)) {
887953
-                SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err);
887953
+                SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0,
887953
+                                         0, err);
887953
         }
887953
 
887953
         STACK_WIND (frame, shard_mknod_cbk, FIRST_CHILD (this),
887953
                     FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
887953
                     xdata);
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL,
887953
-                            NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_MKNOD, frame, -1, ENOMEM);
887953
         return 0;
887953
 
887953
 }
887953
@@ -2594,8 +2698,7 @@ shard_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
                              preparent, postparent, xdata);
887953
         return 0;
887953
 err:
887953
-        SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, NULL, NULL,
887953
-                            NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_LINK, frame, op_ret, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -2660,10 +2763,8 @@ shard_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
887953
         shard_lookup_base_file (frame, this, &local->loc,
887953
                                 shard_post_lookup_link_handler);
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
887953
-                            NULL);
887953
+        shard_common_failure_unwind (GF_FOP_LINK, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -2678,13 +2779,8 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
887953
-                if (local->fop == GF_FOP_UNLINK)
887953
-                        SHARD_STACK_UNWIND (unlink, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL);
887953
-                else
887953
-                        SHARD_STACK_UNWIND (rename, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, NULL, NULL,
887953
-                                            NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
         local->op_ret = 0;
887953
@@ -2724,13 +2820,9 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this)
887953
                                 shard_rename_cbk (frame, this);
887953
                         return 0;
887953
                 } else {
887953
-                        if (local->fop == GF_FOP_UNLINK)
887953
-                                SHARD_STACK_UNWIND (unlink, frame,
887953
-                                                    local->op_ret,
887953
-                                                    local->op_errno, NULL, NULL,
887953
-                                                    NULL);
887953
-                        else
887953
-                                shard_rename_cbk (frame, this);
887953
+                        shard_common_failure_unwind (local->fop, frame,
887953
+                                                     local->op_ret,
887953
+                                                     local->op_errno);
887953
                         return 0;
887953
                 }
887953
         }
887953
@@ -2745,103 +2837,6 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this)
887953
         return 0;
887953
 }
887953
 
887953
-int
887953
-shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
-                            int32_t op_ret, int32_t op_errno,
887953
-                            struct iatt *preparent, struct iatt *postparent,
887953
-                            dict_t *xdata)
887953
-{
887953
-        int                  ret        = 0;
887953
-        uint32_t             link_count = 0;
887953
-        shard_local_t       *local      = NULL;
887953
-        shard_priv_t        *priv       = NULL;
887953
-
887953
-        local = frame->local;
887953
-        priv = this->private;
887953
-
887953
-        if (op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL,
887953
-                                    NULL);
887953
-                return 0;
887953
-        }
887953
-
887953
-        /* Because link() does not create links for all but the
887953
-         * base shard, unlink() must delete these shards only when the
887953
-         * link count is 1. We can return safely now.
887953
-         */
887953
-        if ((xdata) && (!dict_get_uint32 (xdata, GET_LINK_COUNT, &link_count))
887953
-            && (link_count > 1))
887953
-                goto unwind;
887953
-
887953
-        local->first_block = get_lowest_block (0, local->block_size);
887953
-        local->last_block = get_highest_block (0, local->prebuf.ia_size,
887953
-                                               local->block_size);
887953
-        local->num_blocks = local->last_block - local->first_block + 1;
887953
-        local->resolver_base_inode = local->loc.inode;
887953
-
887953
-        /* num_blocks = 1 implies that the file has not crossed its
887953
-         * shard block size. So unlink boils down to unlinking just the
887953
-         * base file. We can safely return now.
887953
-         */
887953
-        if (local->num_blocks == 1)
887953
-                goto unwind;
887953
-
887953
-        local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
887953
-                                       gf_shard_mt_inode_list);
887953
-        if (!local->inode_list)
887953
-                goto unwind;
887953
-
887953
-        /* Save the xdata and preparent and postparent iatts now. This will be
887953
-         * used at the time of unwinding the call to the parent xl.
887953
-         */
887953
-        local->preoldparent = *preparent;
887953
-        local->postoldparent = *postparent;
887953
-        if (xdata)
887953
-                local->xattr_rsp = dict_ref (xdata);
887953
-
887953
-        local->dot_shard_loc.inode = inode_find (this->itable,
887953
-                                                 priv->dot_shard_gfid);
887953
-        if (!local->dot_shard_loc.inode) {
887953
-                ret = shard_init_internal_dir_loc (this, local,
887953
-                                                   SHARD_INTERNAL_DIR_DOT_SHARD);
887953
-                if (ret)
887953
-                        goto unwind;
887953
-                shard_lookup_internal_dir (frame, this,
887953
-                                           shard_post_resolve_unlink_handler,
887953
-                                           SHARD_INTERNAL_DIR_DOT_SHARD);
887953
-        } else {
887953
-                local->post_res_handler = shard_post_resolve_unlink_handler;
887953
-                shard_refresh_internal_dir (frame, this,
887953
-                                            SHARD_INTERNAL_DIR_DOT_SHARD);
887953
-        }
887953
-
887953
-        return 0;
887953
-
887953
-unwind:
887953
-        SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno,  preparent,
887953
-                            postparent, xdata);
887953
-        return 0;
887953
-}
887953
-
887953
-int
887953
-shard_unlink_base_file (call_frame_t *frame, xlator_t *this)
887953
-{
887953
-        shard_local_t *local = NULL;
887953
-
887953
-        local = frame->local;
887953
-
887953
-        if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0))
887953
-                gf_msg (this->name, GF_LOG_WARNING, 0,
887953
-                        SHARD_MSG_DICT_SET_FAILED, "Failed to set "
887953
-                        GET_LINK_COUNT" in dict");
887953
-
887953
-        /* To-Do: Request open-fd count on base file */
887953
-        STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
887953
-                    FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
887953
-                    local->xattr_req);
887953
-        return 0;
887953
-}
887953
-
887953
 void
887953
 shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
887953
 {
887953
@@ -3062,160 +3057,754 @@ next:
887953
 }
887953
 
887953
 int
887953
-shard_post_lookup_unlink_handler (call_frame_t *frame, xlator_t *this)
887953
+shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
887953
 {
887953
-        shard_local_t *local = NULL;
887953
+        if (op_ret)
887953
+                gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
887953
+                        "Unlock failed. Please check brick logs for "
887953
+                        "more details");
887953
+        SHARD_STACK_DESTROY (frame);
887953
+        return 0;
887953
+}
887953
 
887953
-        local = frame->local;
887953
+int
887953
+shard_unlock_inodelk (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        loc_t           *loc          = NULL;
887953
+        call_frame_t    *lk_frame     = NULL;
887953
+        shard_local_t   *local        = NULL;
887953
+        shard_local_t   *lk_local     = NULL;
887953
+        shard_inodelk_t *lock         = NULL;
887953
 
887953
-        if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (unlink, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL);
887953
-                return 0;
887953
-        }
887953
+        local = frame->local;
887953
+        lk_frame = local->inodelk_frame;
887953
+        lk_local = lk_frame->local;
887953
+        local->inodelk_frame = NULL;
887953
+        loc = &local->int_inodelk.loc;
887953
+        lock = &lk_local->int_inodelk;
887953
+        lock->flock.l_type = F_UNLCK;
887953
 
887953
-        shard_unlink_base_file (frame, this);
887953
+        STACK_WIND (lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->inodelk, lock->domain, loc,
887953
+                    F_SETLK, &lock->flock, NULL);
887953
+        local->int_inodelk.acquired_lock = _gf_false;
887953
         return 0;
887953
 }
887953
 
887953
 int
887953
-shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
887953
-              dict_t *xdata)
887953
+shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                      int32_t op_ret, int32_t op_errno, struct iatt *buf,
887953
+                      struct iatt *preoldparent, struct iatt *postoldparent,
887953
+                      struct iatt *prenewparent, struct iatt *postnewparent,
887953
+                      dict_t *xdata);
887953
+int
887953
+shard_rename_src_base_file (call_frame_t *frame, xlator_t *this)
887953
 {
887953
-        int             ret        = -1;
887953
-        uint64_t        block_size = 0;
887953
-        shard_local_t  *local      = NULL;
887953
+        int             ret     = 0;
887953
+        loc_t          *dst_loc = NULL;
887953
+        loc_t           tmp_loc = {0,};
887953
+        shard_local_t  *local   = frame->local;
887953
 
887953
-        ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
887953
-        if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
887953
-                gf_msg (this->name, GF_LOG_ERROR, 0,
887953
-                        SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
887953
-                        "size from inode ctx of %s",
887953
-                        uuid_utoa (loc->inode->gfid));
887953
-                goto err;
887953
-        }
887953
+        if (local->dst_block_size) {
887953
+                tmp_loc.parent = inode_ref (local->loc2.parent);
887953
+                ret = inode_path (tmp_loc.parent, local->loc2.name,
887953
+                                  (char **)&tmp_loc.path);
887953
+                if (ret < 0) {
887953
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
887953
+                                SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
887953
+                                " on pargfid=%s bname=%s",
887953
+                                uuid_utoa (tmp_loc.parent->gfid),
887953
+                                local->loc2.name);
887953
+                        local->op_ret = -1;
887953
+                        local->op_errno = ENOMEM;
887953
+                        goto err;
887953
+                }
887953
 
887953
-        if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
887953
-                STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
887953
-                            FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
887953
-                return 0;
887953
+                tmp_loc.name = strrchr (tmp_loc.path, '/');
887953
+                if (tmp_loc.name)
887953
+                        tmp_loc.name++;
887953
+                dst_loc = &tmp_loc;
887953
+        } else {
887953
+                dst_loc = &local->loc2;
887953
         }
887953
 
887953
-        local = mem_get0 (this->local_pool);
887953
-        if (!local)
887953
-                goto err;
887953
-
887953
-        frame->local = local;
887953
-
887953
-        loc_copy (&local->loc, loc);
887953
-        local->xflag = xflag;
887953
-        local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
887953
-        local->block_size = block_size;
887953
-        local->resolver_base_inode = loc->inode;
887953
-        local->fop = GF_FOP_UNLINK;
887953
-        if (!this->itable)
887953
-                this->itable = (local->loc.inode)->table;
887953
-
887953
-        shard_lookup_base_file (frame, this, &local->loc,
887953
-                                shard_post_lookup_unlink_handler);
887953
+        /* To-Do: Request open-fd count on dst base file */
887953
+        STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
887953
+                    local->xattr_req);
887953
+        loc_wipe (&tmp_loc);
887953
         return 0;
887953
 err:
887953
-        SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        loc_wipe (&tmp_loc);
887953
+        shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                     local->op_errno);
887953
         return 0;
887953
-
887953
 }
887953
 
887953
 int
887953
-shard_rename_cbk (call_frame_t *frame, xlator_t *this)
887953
+shard_unlink_base_file (call_frame_t *frame, xlator_t *this);
887953
+
887953
+int
887953
+shard_set_size_attrs_on_marker_file_cbk (call_frame_t *frame, void *cookie,
887953
+                                         xlator_t *this, int32_t op_ret,
887953
+                                         int32_t op_errno, dict_t *dict,
887953
+                                         dict_t *xdata)
887953
 {
887953
+        shard_priv_t  *priv  = NULL;
887953
         shard_local_t *local = NULL;
887953
 
887953
+        priv = this->private;
887953
         local = frame->local;
887953
+        if (op_ret < 0) {
887953
+                gf_msg (this->name, GF_LOG_ERROR, op_errno,
887953
+                        SHARD_MSG_FOP_FAILED, "Xattrop on marker file failed "
887953
+                        "while performing %s; entry gfid=%s",
887953
+                        gf_fop_string (local->fop), local->newloc.name);
887953
+                goto err;
887953
+        }
887953
 
887953
-        SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
887953
-                            &local->prebuf, &local->preoldparent,
887953
-                            &local->postoldparent, &local->prenewparent,
887953
-                            &local->postnewparent, local->xattr_rsp);
887953
+        inode_unlink (local->newloc.inode, priv->dot_shard_rm_inode,
887953
+                      local->newloc.name);
887953
+
887953
+        if (local->fop == GF_FOP_UNLINK)
887953
+                shard_unlink_base_file (frame, this);
887953
+        else if (local->fop == GF_FOP_RENAME)
887953
+                shard_rename_src_base_file (frame, this);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (local->fop, frame, op_ret, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
 int
887953
-shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this)
887953
+shard_set_size_attrs_on_marker_file (call_frame_t *frame, xlator_t *this)
887953
 {
887953
-        int            ret        = -1;
887953
-        uint32_t       link_count = 0;
887953
-        shard_local_t *local      = NULL;
887953
-        shard_priv_t  *priv       = NULL;
887953
+        int                 op_errno  = ENOMEM;
887953
+        uint64_t            bs        = 0;
887953
+        dict_t             *xdata     = NULL;
887953
+        shard_local_t      *local     = NULL;
887953
 
887953
         local = frame->local;
887953
-        priv = this->private;
887953
-
887953
-        local->first_block = get_lowest_block (0, local->dst_block_size);
887953
-        local->last_block = get_highest_block (0, local->postbuf.ia_size,
887953
-                                               local->dst_block_size);
887953
-        local->num_blocks = local->last_block - local->first_block + 1;
887953
-        local->resolver_base_inode = local->loc2.inode;
887953
+        xdata = dict_new ();
887953
+        if (!xdata)
887953
+                goto err;
887953
 
887953
-        if ((local->xattr_rsp) &&
887953
-            (!dict_get_uint32 (local->xattr_rsp, GET_LINK_COUNT, &link_count))
887953
-            && (link_count > 1)) {
887953
-                shard_rename_cbk (frame, this);
887953
-                return 0;
887953
+        if (local->fop == GF_FOP_UNLINK)
887953
+                bs = local->block_size;
887953
+        else if (local->fop == GF_FOP_RENAME)
887953
+                bs = local->dst_block_size;
887953
+        SHARD_INODE_CREATE_INIT (this, bs, xdata, &local->newloc,
887953
+                                 local->prebuf.ia_size, 0, err);
887953
+        STACK_WIND (frame, shard_set_size_attrs_on_marker_file_cbk,
887953
+                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
887953
+                    &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
887953
+        dict_unref (xdata);
887953
+        return 0;
887953
+err:
887953
+        if (xdata)
887953
+                dict_unref (xdata);
887953
+        shard_common_failure_unwind (local->fop, frame, -1, op_errno);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_lookup_marker_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                              int32_t op_ret, int32_t op_errno, inode_t *inode,
887953
+                              struct iatt *buf, dict_t *xdata,
887953
+                              struct iatt *postparent)
887953
+{
887953
+        inode_t            *linked_inode = NULL;
887953
+        shard_priv_t       *priv         = NULL;
887953
+        shard_local_t      *local        = NULL;
887953
+
887953
+        local = frame->local;
887953
+        priv = this->private;
887953
+
887953
+        if (op_ret < 0) {
887953
+                gf_msg (this->name, GF_LOG_ERROR, op_errno,
887953
+                        SHARD_MSG_FOP_FAILED, "Lookup on marker file failed "
887953
+                        "while performing %s; entry gfid=%s",
887953
+                        gf_fop_string (local->fop), local->newloc.name);
887953
+                goto err;
887953
         }
887953
 
887953
-        if (local->num_blocks == 1) {
887953
-                shard_rename_cbk (frame, this);
887953
+        linked_inode = inode_link (inode, priv->dot_shard_rm_inode,
887953
+                                   local->newloc.name, buf);
887953
+        inode_unref (local->newloc.inode);
887953
+        local->newloc.inode = linked_inode;
887953
+        shard_set_size_attrs_on_marker_file (frame, this);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (local->fop, frame, op_ret, op_errno);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_lookup_marker_file (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        int                 op_errno  = ENOMEM;
887953
+        dict_t             *xattr_req = NULL;
887953
+        shard_local_t      *local     = NULL;
887953
+
887953
+        local = frame->local;
887953
+
887953
+        xattr_req = shard_create_gfid_dict (local->xattr_req);
887953
+        if (!xattr_req)
887953
+                goto err;
887953
+
887953
+        STACK_WIND (frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
887953
+        dict_unref (xattr_req);
887953
+        return 0;
887953
+err:
887953
+        if (xattr_req)
887953
+                dict_unref (xattr_req);
887953
+        shard_common_failure_unwind (local->fop, frame, -1, op_errno);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_create_marker_file_under_remove_me_cbk (call_frame_t *frame, void *cookie,
887953
+                                              xlator_t *this, int32_t op_ret,
887953
+                                              int32_t op_errno, inode_t *inode,
887953
+                                              struct iatt *buf,
887953
+                                              struct iatt *preparent,
887953
+                                              struct iatt *postparent,
887953
+                                              dict_t *xdata)
887953
+{
887953
+        inode_t       *linked_inode = NULL;
887953
+        shard_priv_t  *priv         = NULL;
887953
+        shard_local_t *local        = NULL;
887953
+
887953
+        local = frame->local;
887953
+        priv = this->private;
887953
+
887953
+        SHARD_UNSET_ROOT_FS_ID (frame, local);
887953
+        if (op_ret < 0) {
887953
+                if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
887953
+                        local->op_ret = op_ret;
887953
+                        local->op_errno = op_errno;
887953
+                        gf_msg (this->name, GF_LOG_ERROR, op_errno,
887953
+                                SHARD_MSG_FOP_FAILED, "Marker file creation "
887953
+                                "failed while performing %s; entry gfid=%s",
887953
+                                gf_fop_string (local->fop), local->newloc.name);
887953
+                        goto err;
887953
+                } else {
887953
+                        shard_lookup_marker_file (frame, this);
887953
+                        return 0;
887953
+                }
887953
+        }
887953
+
887953
+        linked_inode = inode_link (inode, priv->dot_shard_rm_inode,
887953
+                                   local->newloc.name, buf);
887953
+        inode_unref (local->newloc.inode);
887953
+        local->newloc.inode = linked_inode;
887953
+
887953
+        if (local->fop == GF_FOP_UNLINK)
887953
+                shard_unlink_base_file (frame, this);
887953
+        else if (local->fop == GF_FOP_RENAME)
887953
+                shard_rename_src_base_file (frame, this);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (local->fop, frame, -1, local->op_errno);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_create_marker_file_under_remove_me (call_frame_t *frame, xlator_t *this,
887953
+                                          loc_t *loc)
887953
+{
887953
+        int            ret       = 0;
887953
+        int            op_errno  = ENOMEM;
887953
+        uint64_t       bs        = 0;
887953
+        char           g1[64]    = {0,};
887953
+        char           g2[64]    = {0,};
887953
+        dict_t        *xattr_req = NULL;
887953
+        shard_priv_t  *priv      = NULL;
887953
+        shard_local_t *local     = NULL;
887953
+
887953
+        priv = this->private;
887953
+        local = frame->local;
887953
+
887953
+        SHARD_SET_ROOT_FS_ID (frame, local);
887953
+
887953
+        xattr_req = shard_create_gfid_dict (local->xattr_req);
887953
+        if (!xattr_req)
887953
+                goto err;
887953
+
887953
+        local->newloc.inode = inode_new (this->itable);
887953
+        local->newloc.parent = inode_ref (priv->dot_shard_rm_inode);
887953
+        ret = inode_path (local->newloc.parent, uuid_utoa (loc->inode->gfid),
887953
+                          (char **)&local->newloc.path);
887953
+        if (ret < 0) {
887953
+                gf_msg (this->name, GF_LOG_ERROR, 0,
887953
+                        SHARD_MSG_INODE_PATH_FAILED, "Inode path failed on "
887953
+                        "pargfid=%s bname=%s",
887953
+                        uuid_utoa_r (priv->dot_shard_rm_gfid, g1),
887953
+                        uuid_utoa_r (loc->inode->gfid, g2));
887953
+                goto err;
887953
+        }
887953
+        local->newloc.name = strrchr (local->newloc.path, '/');
887953
+        if (local->newloc.name)
887953
+                local->newloc.name++;
887953
+
887953
+        if (local->fop ==  GF_FOP_UNLINK)
887953
+                bs = local->block_size;
887953
+        else if (local->fop == GF_FOP_RENAME)
887953
+                bs = local->dst_block_size;
887953
+
887953
+        SHARD_INODE_CREATE_INIT (this, bs, xattr_req, &local->newloc,
887953
+                                 local->prebuf.ia_size, 0, err);
887953
+
887953
+        STACK_WIND (frame, shard_create_marker_file_under_remove_me_cbk,
887953
+                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
887953
+                    &local->newloc, 0, 0, 0644, xattr_req);
887953
+        dict_unref (xattr_req);
887953
+        return 0;
887953
+
887953
+err:
887953
+        if (xattr_req)
887953
+                dict_unref (xattr_req);
887953
+        shard_create_marker_file_under_remove_me_cbk (frame, 0, this, -1,
887953
+                                                      op_errno, NULL, NULL,
887953
+                                                      NULL, NULL, NULL);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
887953
+
887953
+int
887953
+shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                            int32_t op_ret, int32_t op_errno,
887953
+                            struct iatt *preparent, struct iatt *postparent,
887953
+                            dict_t *xdata)
887953
+{
887953
+        int                  ret        = 0;
887953
+        shard_local_t       *local      = NULL;
887953
+
887953
+        local = frame->local;
887953
+
887953
+        if (op_ret < 0) {
887953
+                local->op_ret = op_ret;
887953
+                local->op_errno = op_errno;
887953
+        } else {
887953
+                local->preoldparent = *preparent;
887953
+                local->postoldparent = *postparent;
887953
+                if (xdata)
887953
+                        local->xattr_rsp = dict_ref (xdata);
887953
+        }
887953
+        if (local->entrylk_frame) {
887953
+                ret = shard_unlock_entrylk (frame, this);
887953
+                if (ret < 0) {
887953
+                        local->op_ret = -1;
887953
+                        local->op_errno = -ret;
887953
+                }
887953
+        }
887953
+
887953
+        ret = shard_unlock_inodelk (frame, this);
887953
+        if (ret < 0) {
887953
+                local->op_ret = -1;
887953
+                local->op_errno = -ret;
887953
+        }
887953
+        shard_unlink_cbk (frame, this);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_unlink_base_file (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = frame->local;
887953
+
887953
+        /* To-Do: Request open-fd count on base file */
887953
+        STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
887953
+                    local->xattr_req);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
887953
+{
887953
+        if (op_ret)
887953
+                gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
887953
+                        "Unlock failed. Please check brick logs for "
887953
+                        "more details");
887953
+        SHARD_STACK_DESTROY (frame);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_unlock_entrylk (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        loc_t           *loc          = NULL;
887953
+        call_frame_t    *lk_frame     = NULL;
887953
+        shard_local_t   *local        = NULL;
887953
+        shard_local_t   *lk_local     = NULL;
887953
+        shard_entrylk_t *lock         = NULL;
887953
+
887953
+        local = frame->local;
887953
+        lk_frame = local->entrylk_frame;
887953
+        lk_local = lk_frame->local;
887953
+        local->entrylk_frame = NULL;
887953
+        lock = &lk_local->int_entrylk;
887953
+        loc = &lock->loc;
887953
+
887953
+        STACK_WIND (lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->entrylk, this->name, loc,
887953
+                    lk_local->int_entrylk.basename, ENTRYLK_UNLOCK,
887953
+                    ENTRYLK_WRLCK, NULL);
887953
+        local->int_entrylk.acquired_lock = _gf_false;
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_post_entrylk_fop_handler (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = NULL;
887953
+
887953
+        local = frame->local;
887953
+
887953
+        switch (local->fop) {
887953
+        case GF_FOP_UNLINK:
887953
+        case GF_FOP_RENAME:
887953
+                shard_create_marker_file_under_remove_me (frame, this,
887953
+                                                          &local->int_inodelk.loc);
887953
+                break;
887953
+        default:
887953
+                gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
887953
+                        "post-entrylk handler not defined. This case should not"
887953
+                        " be hit");
887953
+                break;
887953
+        }
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_acquire_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
887953
+{
887953
+        call_frame_t  *main_frame      = NULL;
887953
+        shard_local_t *local           = NULL;
887953
+        shard_local_t *main_local      = NULL;
887953
+
887953
+        local = frame->local;
887953
+        main_frame = local->main_frame;
887953
+        main_local = main_frame->local;
887953
+
887953
+        if (local->op_ret < 0) {
887953
+                shard_common_failure_unwind (main_local->fop, main_frame,
887953
+                                             op_ret, op_errno);
887953
                 return 0;
887953
         }
887953
+        main_local->int_entrylk.acquired_lock = _gf_true;
887953
+        shard_post_entrylk_fop_handler (main_frame, this);
887953
+        return 0;
887953
+}
887953
 
887953
-        local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
887953
-                                       gf_shard_mt_inode_list);
887953
-        if (!local->inode_list)
887953
-                goto out;
887953
+int
887953
+shard_acquire_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
887953
+                       uuid_t gfid)
887953
+{
887953
+        char              gfid_str[GF_UUID_BUF_SIZE] = {0,};
887953
+        shard_local_t    *local                      = NULL;
887953
+        shard_local_t    *entrylk_local              = NULL;
887953
+        shard_entrylk_t  *int_entrylk                = NULL;
887953
+        call_frame_t     *entrylk_frame              = NULL;
887953
 
887953
-        local->dot_shard_loc.inode = inode_find (this->itable,
887953
-                                                 priv->dot_shard_gfid);
887953
-        if (!local->dot_shard_loc.inode) {
887953
-                ret = shard_init_internal_dir_loc (this, local,
887953
-                                                   SHARD_INTERNAL_DIR_DOT_SHARD);
887953
-                if (ret)
887953
-                        goto out;
887953
-                shard_lookup_internal_dir (frame, this,
887953
-                                           shard_post_resolve_unlink_handler,
887953
-                                           SHARD_INTERNAL_DIR_DOT_SHARD);
887953
+        entrylk_frame = create_frame (this, this->ctx->pool);
887953
+        if (!entrylk_frame) {
887953
+                gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
887953
+                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame "
887953
+                        "to lock marker file");
887953
+                goto err;
887953
+        }
887953
+
887953
+        entrylk_local = mem_get0 (this->local_pool);
887953
+        if (!entrylk_local) {
887953
+                STACK_DESTROY (entrylk_frame->root);
887953
+                goto err;
887953
+        }
887953
+
887953
+        local = frame->local;
887953
+        entrylk_frame->local = entrylk_local;
887953
+        entrylk_local->main_frame = frame;
887953
+        int_entrylk = &entrylk_local->int_entrylk;
887953
+
887953
+        int_entrylk->loc.inode = inode_ref (inode);
887953
+        set_lk_owner_from_ptr (&entrylk_frame->root->lk_owner,
887953
+                               entrylk_frame->root);
887953
+        local->entrylk_frame = entrylk_frame;
887953
+        gf_uuid_unparse (gfid, gfid_str);
887953
+        int_entrylk->basename = gf_strdup (gfid_str);
887953
+
887953
+        STACK_WIND (entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->entrylk, this->name,
887953
+                    &int_entrylk->loc, int_entrylk->basename, ENTRYLK_LOCK,
887953
+                    ENTRYLK_WRLCK, NULL);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (local->fop, frame, -1, ENOMEM);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = NULL;
887953
+        shard_priv_t  *priv  = NULL;
887953
+
887953
+        priv = this->private;
887953
+        local = frame->local;
887953
+
887953
+        if (local->op_ret < 0) {
887953
+                shard_common_failure_unwind (local->fop, frame, -1,
887953
+                                             local->op_errno);
887953
+                return 0;
887953
+        }
887953
+
887953
+        if (local->prebuf.ia_nlink > 1) {
887953
+                gf_msg_debug (this->name, 0, "link count on %s > 1:%d, "
887953
+                              "performing rename()/unlink()",
887953
+                              local->int_inodelk.loc.path, local->prebuf.ia_nlink);
887953
+                if (local->fop == GF_FOP_RENAME)
887953
+                        shard_rename_src_base_file (frame, this);
887953
+                else if (local->fop == GF_FOP_UNLINK)
887953
+                        shard_unlink_base_file (frame, this);
887953
         } else {
887953
-                local->post_res_handler = shard_post_resolve_unlink_handler;
887953
-                shard_refresh_internal_dir (frame, this,
887953
-                                            SHARD_INTERNAL_DIR_DOT_SHARD);
887953
+                gf_msg_debug (this->name, 0, "link count on %s = 1, creating "
887953
+                              "file under .remove_me", local->int_inodelk.loc.path);
887953
+                shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode,
887953
+                                       local->prebuf.ia_gfid);
887953
         }
887953
+        return 0;
887953
+}
887953
 
887953
+int
887953
+shard_post_inodelk_fop_handler (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = NULL;
887953
+
887953
+        local = frame->local;
887953
+
887953
+        switch (local->fop) {
887953
+        case GF_FOP_UNLINK:
887953
+        case GF_FOP_RENAME:
887953
+                shard_lookup_base_file (frame, this, &local->int_inodelk.loc,
887953
+                                        shard_post_lookup_base_shard_rm_handler);
887953
+                break;
887953
+        default:
887953
+                gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
887953
+                        "post-inodelk handler not defined. This case should not"
887953
+                        " be hit");
887953
+                break;
887953
+        }
887953
         return 0;
887953
+}
887953
 
887953
-out:
887953
-        SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
887953
-                            NULL, NULL);
887953
+int
887953
+shard_acquire_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
+                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
887953
+{
887953
+        call_frame_t  *main_frame      = NULL;
887953
+        shard_local_t *local           = NULL;
887953
+        shard_local_t *main_local      = NULL;
887953
+
887953
+        local = frame->local;
887953
+        main_frame = local->main_frame;
887953
+        main_local = main_frame->local;
887953
+
887953
+        if (local->op_ret < 0) {
887953
+                shard_common_failure_unwind (main_local->fop, main_frame,
887953
+                                             op_ret, op_errno);
887953
+                return 0;
887953
+        }
887953
+        main_local->int_inodelk.acquired_lock = _gf_true;
887953
+        shard_post_inodelk_fop_handler (main_frame, this);
887953
         return 0;
887953
 }
887953
 
887953
 int
887953
-shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this)
887953
+shard_acquire_inodelk (call_frame_t *frame, xlator_t *this, loc_t *loc)
887953
+{
887953
+        call_frame_t     *lk_frame  = NULL;
887953
+        shard_local_t    *local     = NULL;
887953
+        shard_local_t    *lk_local  = NULL;
887953
+        shard_inodelk_t  *int_inodelk  = NULL;
887953
+
887953
+        lk_frame = create_frame (this, this->ctx->pool);
887953
+        if (!lk_frame) {
887953
+                gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
887953
+                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame "
887953
+                        "to lock base shard");
887953
+                goto err;
887953
+        }
887953
+        lk_local = mem_get0 (this->local_pool);
887953
+        if (!lk_local) {
887953
+                STACK_DESTROY (lk_frame->root);
887953
+                goto err;
887953
+        }
887953
+
887953
+        local = frame->local;
887953
+        lk_frame->local = lk_local;
887953
+        lk_local->main_frame = frame;
887953
+        int_inodelk = &lk_local->int_inodelk;
887953
+
887953
+        int_inodelk->flock.l_len = 0;
887953
+        int_inodelk->flock.l_start = 0;
887953
+        int_inodelk->domain = this->name;
887953
+        int_inodelk->flock.l_type = F_WRLCK;
887953
+        loc_copy (&local->int_inodelk.loc, loc);
887953
+        set_lk_owner_from_ptr (&lk_frame->root->lk_owner, lk_frame->root);
887953
+        local->inodelk_frame = lk_frame;
887953
+
887953
+        STACK_WIND (lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
887953
+                    FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
887953
+                    &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (local->fop, frame, -1, ENOMEM);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_post_mkdir_rm_handler (call_frame_t *frame, xlator_t *this)
887953
 {
887953
+        loc_t         *loc   = NULL;
887953
         shard_local_t *local = NULL;
887953
 
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (rename, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL, NULL,
887953
-                                    NULL, NULL);
887953
+                shard_common_failure_unwind (local->fop, frame, -1,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
+        if (local->fop == GF_FOP_UNLINK)
887953
+                loc = &local->loc;
887953
+        else if (local->fop == GF_FOP_RENAME)
887953
+                loc = &local->loc2;
887953
+        shard_acquire_inodelk (frame, this, loc);
887953
+        return 0;
887953
+}
887953
 
887953
-        if (local->dst_block_size)
887953
-                shard_rename_unlink_dst_shards_do (frame, this);
887953
-        else
887953
-                shard_rename_cbk (frame, this);
887953
+int
887953
+shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this,
887953
+                          shard_post_resolve_fop_handler_t handler,
887953
+                          shard_internal_dir_type_t type);
887953
+int
887953
+shard_pre_mkdir_rm_handler (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = NULL;
887953
+
887953
+        local = frame->local;
887953
 
887953
+        if (local->op_ret < 0) {
887953
+                shard_common_failure_unwind (local->fop, frame, -1,
887953
+                                             local->op_errno);
887953
+                return 0;
887953
+        }
887953
+        shard_mkdir_internal_dir (frame, this, shard_post_mkdir_rm_handler,
887953
+                                  SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
887953
+        return 0;
887953
+}
887953
+
887953
+void
887953
+shard_begin_rm_resolution (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_priv_t  *priv  = NULL;
887953
+        shard_local_t *local = NULL;
887953
+
887953
+        priv = this->private;
887953
+        local = frame->local;
887953
+
887953
+        local->dot_shard_rm_loc.inode = inode_find (this->itable,
887953
+                                                    priv->dot_shard_rm_gfid);
887953
+        if (!local->dot_shard_rm_loc.inode) {
887953
+                local->dot_shard_loc.inode = inode_find (this->itable,
887953
+                                                         priv->dot_shard_gfid);
887953
+                if (!local->dot_shard_loc.inode) {
887953
+                        shard_mkdir_internal_dir (frame, this,
887953
+                                                  shard_pre_mkdir_rm_handler,
887953
+                                                  SHARD_INTERNAL_DIR_DOT_SHARD);
887953
+                } else {
887953
+                        local->post_res_handler = shard_pre_mkdir_rm_handler;
887953
+                        shard_refresh_internal_dir (frame, this,
887953
+                                                    SHARD_INTERNAL_DIR_DOT_SHARD);
887953
+                }
887953
+        } else {
887953
+                local->post_res_handler = shard_post_mkdir_rm_handler;
887953
+                shard_refresh_internal_dir (frame, this,
887953
+                                        SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
887953
+        }
887953
+}
887953
+
887953
+int
887953
+shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
887953
+              dict_t *xdata)
887953
+{
887953
+        int             ret        = -1;
887953
+        uint64_t        block_size = 0;
887953
+        shard_local_t  *local      = NULL;
887953
+
887953
+        ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
887953
+        if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
887953
+                gf_msg (this->name, GF_LOG_ERROR, 0,
887953
+                        SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
887953
+                        "size from inode ctx of %s",
887953
+                        uuid_utoa (loc->inode->gfid));
887953
+                goto err;
887953
+        }
887953
+
887953
+        if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
887953
+                STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
887953
+                            FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
887953
+                return 0;
887953
+        }
887953
+
887953
+        local = mem_get0 (this->local_pool);
887953
+        if (!local)
887953
+                goto err;
887953
+
887953
+        frame->local = local;
887953
+
887953
+        loc_copy (&local->loc, loc);
887953
+        local->xflag = xflag;
887953
+        local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
887953
+        local->block_size = block_size;
887953
+        local->resolver_base_inode = loc->inode;
887953
+        local->fop = GF_FOP_UNLINK;
887953
+        if (!this->itable)
887953
+                this->itable = (local->loc.inode)->table;
887953
+
887953
+        local->resolve_not = _gf_true;
887953
+        shard_begin_rm_resolution (frame, this);
887953
+        return 0;
887953
+err:
887953
+        shard_common_failure_unwind (GF_FOP_UNLINK, frame, -1, ENOMEM);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_rename_cbk (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_local_t *local = NULL;
887953
+
887953
+        local = frame->local;
887953
+
887953
+        SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
887953
+                            &local->prebuf, &local->preoldparent,
887953
+                            &local->postoldparent, &local->prenewparent,
887953
+                            &local->postnewparent, local->xattr_rsp);
887953
+        return 0;
887953
+}
887953
+
887953
+int
887953
+shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this)
887953
+{
887953
+        shard_rename_cbk (frame, this);
887953
         return 0;
887953
 }
887953
 
887953
@@ -3226,6 +3815,7 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
                       struct iatt *prenewparent, struct iatt *postnewparent,
887953
                       dict_t *xdata)
887953
 {
887953
+        int            ret   = 0;
887953
         shard_local_t *local = NULL;
887953
 
887953
         local = frame->local;
887953
@@ -3235,6 +3825,11 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
                 local->op_errno = op_errno;
887953
                 goto err;
887953
         }
887953
+        /* Set ctx->refresh to TRUE to force a lookup on disk when
887953
+         * shard_lookup_base_file() is called next to refresh the hard link
887953
+         * count in ctx
887953
+         */
887953
+        shard_inode_ctx_set_refresh_flag (local->int_inodelk.loc.inode, this);
887953
 
887953
         local->prebuf = *buf;
887953
         local->preoldparent = *preoldparent;
887953
@@ -3244,40 +3839,37 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
         if (xdata)
887953
                 local->xattr_rsp = dict_ref (xdata);
887953
 
887953
-        /* Now the base file is looked up to gather the ia_size and ia_blocks.*/
887953
+        if (local->dst_block_size) {
887953
+                if (local->entrylk_frame) {
887953
+                        ret = shard_unlock_entrylk (frame, this);
887953
+                        if (ret < 0) {
887953
+                                local->op_ret = -1;
887953
+                                local->op_errno = -ret;
887953
+                        }
887953
+                }
887953
 
887953
+                ret = shard_unlock_inodelk (frame, this);
887953
+                if (ret < 0) {
887953
+                        local->op_ret = -1;
887953
+                        local->op_errno = -ret;
887953
+                        goto err;
887953
+                }
887953
+        }
887953
+
887953
+        /* Now the base file of src, if sharded, is looked up to gather ia_size
887953
+         * and ia_blocks.*/
887953
         if (local->block_size) {
887953
                 local->tmp_loc.inode = inode_new (this->itable);
887953
                 gf_uuid_copy (local->tmp_loc.gfid, (local->loc.inode)->gfid);
887953
                 shard_lookup_base_file (frame, this, &local->tmp_loc,
887953
                                         shard_post_rename_lookup_handler);
887953
         } else {
887953
-                shard_rename_unlink_dst_shards_do (frame, this);
887953
+                shard_rename_cbk (frame, this);
887953
         }
887953
-
887953
         return 0;
887953
 err:
887953
-        SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL,
887953
-                            NULL, NULL, NULL, NULL, NULL);
887953
-        return 0;
887953
-}
887953
-
887953
-int
887953
-shard_rename_src_base_file (call_frame_t *frame, xlator_t *this)
887953
-{
887953
-        shard_local_t *local = NULL;
887953
-
887953
-        local = frame->local;
887953
-
887953
-        if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0))
887953
-                gf_msg (this->name, GF_LOG_WARNING, 0,
887953
-                        SHARD_MSG_DICT_SET_FAILED, "Failed to set "
887953
-                        GET_LINK_COUNT" in dict");
887953
-
887953
-        /* To-Do: Request open-fd count on dst base file */
887953
-        STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this),
887953
-                    FIRST_CHILD(this)->fops->rename, &local->loc, &local->loc2,
887953
-                    local->xattr_req);
887953
+        shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                     local->op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -3289,9 +3881,8 @@ shard_post_lookup_dst_base_file_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (rename, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL, NULL,
887953
-                                    NULL, NULL);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -3332,6 +3923,7 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
887953
         if (newloc->inode)
887953
                 ret = shard_inode_ctx_get_block_size (newloc->inode, this,
887953
                                                       &dst_block_size);
887953
+
887953
         /* The following stack_wind covers the case where:
887953
          * a. the src file is not sharded and dst doesn't exist, OR
887953
          * b. the src and dst both exist but are not sharded.
887953
@@ -3361,26 +3953,26 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
887953
         local->dst_block_size = dst_block_size;
887953
         if (!this->itable)
887953
                 this->itable = (local->loc.inode)->table;
887953
+        local->resolve_not = _gf_true;
887953
 
887953
-        if (local->dst_block_size)
887953
-                /* The if block covers the case where the dst file exists and is
887953
-                 * sharded. So it is important to look up this inode, record its
887953
-                 * size, before renaming src to dst, so as to NOT lose this
887953
-                 * information.
887953
-                 */
887953
-                shard_lookup_base_file (frame, this, &local->loc2,
887953
-                                       shard_post_lookup_dst_base_file_handler);
887953
-        else
887953
-                /* The following block covers the case where the dst either
887953
-                 * doesn't exist or is NOT sharded. In this case, shard xlator
887953
-                 * would go ahead and rename src to dst.
887953
-                 */
887953
+        /* The following if-block covers the case where the dst file exists
887953
+         * and is sharded.
887953
+         */
887953
+        if (local->dst_block_size) {
887953
+                shard_begin_rm_resolution (frame, this);
887953
+        } else {
887953
+        /* The following block covers the case where the dst either doesn't
887953
+         * exist or is NOT sharded but the src is sharded. In this case, shard
887953
+         * xlator would go ahead and rename src to dst. Once done, it would also
887953
+         * lookup the base shard of src to get the ia_size and ia_blocks xattr
887953
+         * values.
887953
+         */
887953
                 shard_rename_src_base_file (frame, this);
887953
+        }
887953
         return 0;
887953
 
887953
 err:
887953
-        SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL,
887953
-                             NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_RENAME, frame, -1, ENOMEM);
887953
         return 0;
887953
 
887953
 }
887953
@@ -3400,8 +3992,8 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
         if (op_ret == -1)
887953
                 goto unwind;
887953
 
887953
-        ret = shard_inode_ctx_set (inode, this, stbuf,
887953
-                                   ntoh64 (local->block_size), SHARD_ALL_MASK);
887953
+        ret = shard_inode_ctx_set (inode, this, stbuf, local->block_size,
887953
+                                   SHARD_ALL_MASK);
887953
         if (ret)
887953
                 gf_msg (this->name, GF_LOG_WARNING, 0,
887953
                         SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode "
887953
@@ -3417,28 +4009,29 @@ int
887953
 shard_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
887953
               mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
887953
 {
887953
+        shard_priv_t   *priv       = NULL;
887953
         shard_local_t  *local      = NULL;
887953
 
887953
+        priv = this->private;
887953
         local = mem_get0 (this->local_pool);
887953
         if (!local)
887953
                 goto err;
887953
 
887953
         frame->local = local;
887953
+        local->block_size = priv->block_size;
887953
 
887953
         if (!__is_gsyncd_on_shard_dir (frame, loc)) {
887953
-                SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err);
887953
+                SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0,
887953
+                                         0, err);
887953
         }
887953
 
887953
         STACK_WIND (frame, shard_create_cbk, FIRST_CHILD (this),
887953
                     FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
887953
                     fd, xdata);
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL,
887953
-                             NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_CREATE, frame, -1, ENOMEM);
887953
         return 0;
887953
-
887953
 }
887953
 
887953
 int
887953
@@ -3523,9 +4116,9 @@ out:
887953
         if (call_count == 0) {
887953
                 SHARD_UNSET_ROOT_FS_ID (frame, local);
887953
                 if (local->op_ret < 0) {
887953
-                        SHARD_STACK_UNWIND (readv, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, 0, NULL,
887953
-                                            NULL, NULL);
887953
+                        shard_common_failure_unwind (GF_FOP_READ, frame,
887953
+                                                     local->op_ret,
887953
+                                                     local->op_errno);
887953
                 } else {
887953
                         if (xdata)
887953
                                 local->xattr_rsp = dict_ref (xdata);
887953
@@ -3792,8 +4385,8 @@ shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (readv, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, 0, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -3815,8 +4408,8 @@ shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (readv, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, 0, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -3839,9 +4432,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
887953
 
887953
         if (local->op_ret < 0) {
887953
                 if (local->op_errno != ENOENT) {
887953
-                        SHARD_STACK_UNWIND (readv, frame, local->op_ret,
887953
-                                            local->op_errno, NULL, 0, NULL,
887953
-                                            NULL, NULL);
887953
+                        shard_common_failure_unwind (GF_FOP_READ, frame,
887953
+                                                     local->op_ret,
887953
+                                                     local->op_errno);
887953
                         return 0;
887953
                 } else {
887953
                         struct iovec vec = {0,};
887953
@@ -3878,8 +4471,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (readv, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, 0, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -3955,10 +4548,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
887953
                                             SHARD_INTERNAL_DIR_DOT_SHARD);
887953
         }
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
887953
-                            NULL);
887953
+        shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -4018,8 +4609,7 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
887953
                                 shard_post_lookup_readv_handler);
887953
         return 0;
887953
 err:
887953
-        SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
887953
-                            NULL);
887953
+        shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -4032,9 +4622,8 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame,
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         local->op_ret,
887953
-                                                         local->op_errno);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
         } else {
887953
                 shard_common_inode_write_success_unwind (local->fop, frame,
887953
                                                          local->written_size);
887953
@@ -4139,9 +4728,8 @@ shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie,
887953
         if (call_count == 0) {
887953
                 SHARD_UNSET_ROOT_FS_ID (frame, local);
887953
                 if (local->op_ret < 0) {
887953
-                        shard_common_inode_write_failure_unwind (fop, frame,
887953
-                                                                 local->op_ret,
887953
-                                                               local->op_errno);
887953
+                        shard_common_failure_unwind (fop, frame, local->op_ret,
887953
+                                                     local->op_errno);
887953
                 } else {
887953
                         shard_get_delta_size_from_inode_ctx (local,
887953
                                                              local->fd->inode,
887953
@@ -4343,9 +4931,8 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         local->op_ret,
887953
-                                                         local->op_errno);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4368,9 +4955,8 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         local->op_ret,
887953
-                                                         local->op_errno);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4386,10 +4972,6 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
887953
 }
887953
 
887953
 int
887953
-shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this,
887953
-                          shard_post_resolve_fop_handler_t handler,
887953
-                          shard_internal_dir_type_t type);
887953
-int
887953
 shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
887953
                                                xlator_t *this)
887953
 {
887953
@@ -4398,9 +4980,8 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
887953
         local = frame->local;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         local->op_ret,
887953
-                                                         local->op_errno);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4423,9 +5004,8 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
887953
         shard_priv_t  *priv  = this->private;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         local->op_ret,
887953
-                                                         local->op_errno);
887953
+                shard_common_failure_unwind (local->fop, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4443,8 +5023,7 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
887953
         local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
887953
                                        gf_shard_mt_inode_list);
887953
         if (!local->inode_list) {
887953
-                shard_common_inode_write_failure_unwind (local->fop, frame,
887953
-                                                         -1, ENOMEM);
887953
+                shard_common_failure_unwind (local->fop, frame, -1, ENOMEM);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4508,7 +5087,7 @@ shard_mkdir_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
         if (link_inode != inode) {
887953
                 shard_refresh_internal_dir (frame, this, type);
887953
         } else {
887953
-                shard_inode_ctx_set_refreshed_flag (link_inode, this);
887953
+                shard_inode_ctx_mark_dir_refreshed (link_inode, this);
887953
                 shard_common_resolve_shards (frame, this,
887953
                                              local->post_res_handler);
887953
         }
887953
@@ -4544,6 +5123,10 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this,
887953
                 gf_uuid_copy (*gfid, priv->dot_shard_gfid);
887953
                 loc = &local->dot_shard_loc;
887953
                 break;
887953
+        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
887953
+                gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid);
887953
+                loc = &local->dot_shard_rm_loc;
887953
+                break;
887953
         default:
887953
                 break;
887953
         }
887953
@@ -4702,8 +5285,8 @@ out:
887953
                 return 0;
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
         } else {
887953
                 shard_get_timestamps_from_inode_ctx (local, base_inode, this);
887953
                 SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
887953
@@ -4733,8 +5316,8 @@ shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this)
887953
         INIT_LIST_HEAD (©);
887953
 
887953
         if (local->op_ret < 0) {
887953
-                SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
887953
-                                    local->op_errno, NULL, NULL, NULL);
887953
+                shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret,
887953
+                                             local->op_errno);
887953
                 return 0;
887953
         }
887953
 
887953
@@ -4847,7 +5430,7 @@ shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
887953
                                 shard_post_lookup_fsync_handler);
887953
         return 0;
887953
 err:
887953
-        SHARD_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FSYNC, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5069,9 +5652,8 @@ shard_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
887953
                          FIRST_CHILD(this)->fops->removexattr, loc, name,
887953
                          xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_REMOVEXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5095,9 +5677,8 @@ shard_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
                          FIRST_CHILD(this)->fops->fremovexattr, fd, name,
887953
                          xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5135,9 +5716,8 @@ shard_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
         STACK_WIND (frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
887953
                     FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FGETXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5176,9 +5756,8 @@ shard_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
887953
         STACK_WIND (frame, shard_getxattr_cbk, FIRST_CHILD(this),
887953
                     FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_GETXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5197,9 +5776,8 @@ shard_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
887953
                          FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
887953
                          xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FSETXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5218,9 +5796,8 @@ shard_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
887953
                          FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
887953
                          xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_SETXATTR, frame, -1, op_errno);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5335,11 +5912,9 @@ shard_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
887953
         STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this),
887953
                     FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
887953
                     local->xattr_req);
887953
-
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_SETATTR, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5398,9 +5973,8 @@ shard_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
                     FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
887953
                     local->xattr_req);
887953
         return 0;
887953
-
887953
 err:
887953
-        SHARD_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FSETATTR, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5502,7 +6076,7 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
887953
                                 shard_common_inode_write_post_lookup_handler);
887953
         return 0;
887953
 out:
887953
-        shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM);
887953
+        shard_common_failure_unwind (fop, frame, -1, ENOMEM);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5527,9 +6101,8 @@ shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
         shard_common_inode_write_begin (frame, this, GF_FOP_FALLOCATE, fd, NULL,
887953
                                         0, offset, keep_size, len, NULL, xdata);
887953
         return 0;
887953
-
887953
 out:
887953
-        SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5558,7 +6131,7 @@ shard_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
887953
         /* TBD */
887953
         gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
887953
                 "seek called on %s.", uuid_utoa (fd->inode->gfid));
887953
-        SHARD_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL);
887953
+        shard_common_failure_unwind (GF_FOP_SEEK, frame, -1, ENOTSUP);
887953
         return 0;
887953
 }
887953
 
887953
@@ -5619,6 +6192,7 @@ init (xlator_t *this)
887953
                 goto out;
887953
         }
887953
         gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid);
887953
+        gf_uuid_parse (DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
887953
 
887953
         this->private = priv;
887953
         LOCK_INIT (&priv->lock);
887953
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
887953
index 225caa0..1783ff6 100644
887953
--- a/xlators/features/shard/src/shard.h
887953
+++ b/xlators/features/shard/src/shard.h
887953
@@ -18,6 +18,7 @@
887953
 #include "syncop.h"
887953
 
887953
 #define GF_SHARD_DIR ".shard"
887953
+#define GF_SHARD_REMOVE_ME_DIR ".remove_me"
887953
 #define SHARD_MIN_BLOCK_SIZE  (4 * GF_UNIT_MB)
887953
 #define SHARD_MAX_BLOCK_SIZE  (4 * GF_UNIT_TB)
887953
 #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
887953
@@ -55,6 +56,12 @@
887953
 #define get_highest_block(off, len, shard_size) \
887953
         (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size))
887953
 
887953
+int
887953
+shard_unlock_inodelk (call_frame_t *frame, xlator_t *this);
887953
+
887953
+int
887953
+shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
887953
+
887953
 #define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do {               \
887953
         if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) &&        \
887953
             (((loc->parent) &&                                          \
887953
@@ -79,39 +86,57 @@
887953
         }                                                              \
887953
 } while (0)
887953
 
887953
-#define SHARD_STACK_UNWIND(fop, frame, params ...) do {       \
887953
-        shard_local_t *__local = NULL;                         \
887953
-        if (frame) {                                           \
887953
-                __local = frame->local;                        \
887953
-                frame->local = NULL;                           \
887953
-        }                                                      \
887953
-        STACK_UNWIND_STRICT (fop, frame, params);              \
887953
-        if (__local) {                                         \
887953
-                shard_local_wipe (__local);                    \
887953
-                mem_put (__local);                             \
887953
-        }                                                      \
887953
+#define SHARD_STACK_UNWIND(fop, frame, params ...) do {            \
887953
+        shard_local_t *__local = NULL;                             \
887953
+        if (frame) {                                               \
887953
+                __local = frame->local;                            \
887953
+                if (__local && __local->int_inodelk.acquired_lock) \
887953
+                        shard_unlock_inodelk (frame, frame->this); \
887953
+                if (__local && __local->int_entrylk.acquired_lock) \
887953
+                        shard_unlock_entrylk (frame, frame->this); \
887953
+                frame->local = NULL;                               \
887953
+        }                                                          \
887953
+        STACK_UNWIND_STRICT (fop, frame, params);                  \
887953
+        if (__local) {                                             \
887953
+                shard_local_wipe (__local);                        \
887953
+                mem_put (__local);                                 \
887953
+        }                                                          \
887953
 } while (0)
887953
 
887953
+#define SHARD_STACK_DESTROY(frame)                                \
887953
+        do {                                                    \
887953
+                shard_local_t *__local = NULL;                    \
887953
+                __local = frame->local;                         \
887953
+                frame->local = NULL;                            \
887953
+                STACK_DESTROY (frame->root);                    \
887953
+                if (__local) {                                  \
887953
+                        shard_local_wipe (__local);             \
887953
+                        mem_put (__local);                      \
887953
+                }                                               \
887953
+        } while (0);
887953
+
887953
 
887953
-#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do {      \
887953
+#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size,       \
887953
+                                block_count, label) do {                      \
887953
         int            __ret       = -1;                                      \
887953
         int64_t       *__size_attr = NULL;                                    \
887953
-        shard_priv_t  *__priv      = NULL;                                    \
887953
+        uint64_t      *__bs        = 0;                                       \
887953
                                                                               \
887953
-        __priv = this->private;                                               \
887953
-                                                                              \
887953
-        local->block_size = hton64 (__priv->block_size);                      \
887953
-        __ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE,    \
887953
-                                     &local->block_size,                      \
887953
-                                     sizeof (local->block_size));             \
887953
+        __bs = GF_CALLOC (1, sizeof (uint64_t), gf_shard_mt_uint64_t);        \
887953
+        if (!__bs)                                                            \
887953
+                goto label;                                                   \
887953
+        *__bs = hton64 (block_size);                                          \
887953
+        __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs,     \
887953
+                              sizeof (*__bs));                                \
887953
         if (__ret) {                                                          \
887953
                 gf_msg (this->name, GF_LOG_WARNING, 0,                        \
887953
                         SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s "   \
887953
-                        "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path);  \
887953
+                        "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\
887953
+                GF_FREE (__bs);                                               \
887953
                 goto label;                                                   \
887953
         }                                                                     \
887953
                                                                               \
887953
-        __ret = shard_set_size_attrs (0, 0, &__size_attr);                    \
887953
+        __ret = shard_set_size_attrs (size, block_count, &__size_attr);       \
887953
         if (__ret)                                                            \
887953
                 goto label;                                                   \
887953
                                                                               \
887953
@@ -120,7 +145,7 @@
887953
         if (__ret) {                                                          \
887953
                 gf_msg (this->name, GF_LOG_WARNING, 0,                        \
887953
                         SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s "   \
887953
-                        "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path);   \
887953
+                        "on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path);   \
887953
                 GF_FREE (__size_attr);                                        \
887953
                 goto label;                                                   \
887953
         }                                                                     \
887953
@@ -172,21 +197,34 @@
887953
                 }                                                             \
887953
         } while (0)
887953
 
887953
+/* rm = "remove me" */
887953
 
887953
 typedef struct shard_priv {
887953
         uint64_t block_size;
887953
         uuid_t dot_shard_gfid;
887953
+        uuid_t dot_shard_rm_gfid;
887953
         inode_t *dot_shard_inode;
887953
+        inode_t *dot_shard_rm_inode;
887953
         gf_lock_t lock;
887953
         int inode_count;
887953
         struct list_head ilist_head;
887953
 } shard_priv_t;
887953
 
887953
 typedef struct {
887953
-        loc_t *loc;
887953
-        short type;
887953
+        loc_t loc;
887953
         char *domain;
887953
-} shard_lock_t;
887953
+        struct gf_flock flock;
887953
+        gf_boolean_t acquired_lock;
887953
+} shard_inodelk_t;
887953
+
887953
+typedef struct {
887953
+        loc_t loc;
887953
+        char *domain;
887953
+        char *basename;
887953
+        entrylk_cmd cmd;
887953
+        entrylk_type type;
887953
+        gf_boolean_t acquired_lock;
887953
+} shard_entrylk_t;
887953
 
887953
 typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame,
887953
                                              xlator_t *this);
887953
@@ -200,6 +238,7 @@ typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame,
887953
 
887953
 typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame,
887953
                                                          xlator_t *this);
887953
+
887953
 typedef struct shard_local {
887953
         int op_ret;
887953
         int op_errno;
887953
@@ -227,6 +266,7 @@ typedef struct shard_local {
887953
         int delta_blocks;
887953
         loc_t loc;
887953
         loc_t dot_shard_loc;
887953
+        loc_t dot_shard_rm_loc;
887953
         loc_t loc2;
887953
         loc_t tmp_loc;
887953
         fd_t *fd;
887953
@@ -251,16 +291,18 @@ typedef struct shard_local {
887953
         shard_post_resolve_fop_handler_t post_res_handler;
887953
         shard_post_mknod_fop_handler_t post_mknod_handler;
887953
         shard_post_update_size_fop_handler_t post_update_size_handler;
887953
-        struct {
887953
-                int lock_count;
887953
-                fop_inodelk_cbk_t inodelk_cbk;
887953
-                shard_lock_t *shard_lock;
887953
-        } lock;
887953
+        shard_inodelk_t int_inodelk;
887953
+        shard_entrylk_t int_entrylk;
887953
         inode_t *resolver_base_inode;
887953
         gf_boolean_t first_lookup_done;
887953
         syncbarrier_t barrier;
887953
         gf_boolean_t lookup_shards_barriered;
887953
         gf_boolean_t unlink_shards_barriered;
887953
+        gf_boolean_t resolve_not;
887953
+        loc_t newloc;
887953
+        call_frame_t *main_frame;
887953
+        call_frame_t *inodelk_frame;
887953
+        call_frame_t *entrylk_frame;
887953
 } shard_local_t;
887953
 
887953
 typedef struct shard_inode_ctx {
887953
@@ -284,6 +326,7 @@ typedef struct shard_inode_ctx {
887953
 
887953
 typedef enum {
887953
         SHARD_INTERNAL_DIR_DOT_SHARD = 1,
887953
+        SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME,
887953
 } shard_internal_dir_type_t;
887953
 
887953
 #endif /* __SHARD_H__ */
887953
-- 
887953
1.8.3.1
887953