From b92aedc0b10d3c7b6150b8f18c950bf95494bc5f Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 29 Mar 2018 17:21:32 +0530 Subject: [PATCH 429/444] features/shard: Introducing ".shard/.remove_me" for atomic shard deletion (part 1) > Upstream: https://review.gluster.org/19929 > BUG: 1568521 > Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a PROBLEM: Shards are deleted synchronously when a sharded file is unlinked or when a sharded file participating as the dst in a rename() is going to be replaced. The problem with this approach is it makes the operation really slow, sometimes causing the application to time out, especially with large files. SOLUTION: To make this operation atomic, we introduce a ".remove_me" directory. Now renames and unlinks will simply involve two steps: 1. creating an empty file under .remove_me named after the gfid of the file participating in unlink/rename 2. carrying out the actual rename/unlink A synctask is created (more on that in part 2) to scan this directory after every unlink/rename operation (or upon a volume mount) and clean up all shards associated with it. All of this happens in the background. The task takes care to delete the shards associated with the gfid in .remove_me only if this gfid doesn't exist in backend, ensuring that the file was successfully renamed/unlinked and its shards can be discarded now safely. Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a BUG: 1520882 Signed-off-by: Krutika Dhananjay Reviewed-on: https://code.engineering.redhat.com/gerrit/154863 Tested-by: RHGS Build Bot Reviewed-by: Xavi Hernandez --- libglusterfs/src/common-utils.h | 1 + tests/bugs/shard/bug-1245547.t | 4 +- tests/bugs/shard/bug-1568521-EEXIST.t | 79 ++ tests/bugs/shard/bug-shard-discard.t | 16 +- tests/bugs/shard/shard-inode-refcount-test.t | 2 +- tests/bugs/shard/unlinks-and-renames.t | 118 ++- xlators/features/shard/src/shard-mem-types.h | 1 + xlators/features/shard/src/shard-messages.h | 9 +- xlators/features/shard/src/shard.c | 1384 ++++++++++++++++++-------- xlators/features/shard/src/shard.h | 103 +- 10 files changed, 1250 insertions(+), 467 deletions(-) create mode 100644 tests/bugs/shard/bug-1568521-EEXIST.t diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index e64dea3..c804ed5 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -121,6 +121,7 @@ void trap (void); /* Shard */ #define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size" #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" +#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9" /* Lease: buffer length for stringified lease id * Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum diff --git a/tests/bugs/shard/bug-1245547.t b/tests/bugs/shard/bug-1245547.t index c19b2a6..3c46785 100644 --- a/tests/bugs/shard/bug-1245547.t +++ b/tests/bugs/shard/bug-1245547.t @@ -25,11 +25,11 @@ TEST touch $M0/bar TEST truncate -s 10G $M0/bar #Unlink on such a file should succeed. TEST unlink $M0/bar -# + #Create a file 'baz' with holes. TEST touch $M0/baz TEST truncate -s 10G $M0/baz #Rename with a sharded existing dest that has holes must succeed. TEST mv -f $M0/foo $M0/baz -cleanup; +cleanup diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t new file mode 100644 index 0000000..e4c3d41 --- /dev/null +++ b/tests/bugs/shard/bug-1568521-EEXIST.t @@ -0,0 +1,79 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + +TEST mkdir $M0/dir +# Unlink a temporary file to trigger creation of .remove_me +TEST touch $M0/tmp +TEST unlink $M0/tmp + +TEST stat $B0/${V0}0/.shard/.remove_me +TEST stat $B0/${V0}1/.shard/.remove_me + +TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216 +gfid_file=$(get_gfid_string $M0/dir/file) + +# Create marker file from the backend to simulate ENODATA. +touch $B0/${V0}0/.shard/.remove_me/$gfid_file +touch $B0/${V0}1/.shard/.remove_me/$gfid_file + +# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case +# and confirm that the correct values are set when the actual unlink takes place + +TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file +TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file + +TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file +TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file + +# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT +sleep 2 + +TEST unlink $M0/dir/file +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file + +############################## +### Repeat test for rename ### +############################## + +TEST touch $M0/src +TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216 +gfid_dst=$(get_gfid_string $M0/dir/dst) + +# Create marker file from the backend to simulate ENODATA. +touch $B0/${V0}0/.shard/.remove_me/$gfid_dst +touch $B0/${V0}1/.shard/.remove_me/$gfid_dst + +# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case +# and confirm that the correct values are set when the actual unlink takes place + +TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst + +TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst + +# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT +sleep 2 + +TEST mv -f $M0/src $M0/dir/dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + +cleanup diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t index 72d8586..884d9e7 100644 --- a/tests/bugs/shard/bug-shard-discard.t +++ b/tests/bugs/shard/bug-shard-discard.t @@ -42,14 +42,14 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1` # Now unlink the file. And ensure that all shards associated with the file are cleaned up TEST unlink $M0/foo -TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1 -TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1 -TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1 -TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1 -TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2 -TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2 -TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2 -TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2 +#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1 +#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1 +#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1 +#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1 +#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2 +#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2 +#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2 +#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2 TEST ! stat $M0/foo #clean up everything diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t index 03e0cc9..c92dc07 100644 --- a/tests/bugs/shard/shard-inode-refcount-test.t +++ b/tests/bugs/shard/shard-inode-refcount-test.t @@ -18,7 +18,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23 ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0) TEST rm -f $M0/one-plus-five-shards -EXPECT `expr $ACTIVE_INODES_BEFORE - 5` get_mount_active_size_value $V0 +#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t index a8f188b..997c397 100644 --- a/tests/bugs/shard/unlinks-and-renames.t +++ b/tests/bugs/shard/unlinks-and-renames.t @@ -32,7 +32,17 @@ TEST truncate -s 5M $M0/dir/foo TEST ! stat $B0/${V0}0/.shard TEST ! stat $B0/${V0}1/.shard # Test to ensure that unlink doesn't fail due to absence of /.shard +gfid_foo=$(get_gfid_string $M0/dir/foo) TEST unlink $M0/dir/foo +TEST stat $B0/${V0}0/.shard/.remove_me +TEST stat $B0/${V0}1/.shard/.remove_me +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ################################################## ##### Unlink of a sharded file without holes ##### @@ -46,14 +56,20 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1 TEST stat $B0/${V0}0/.shard/$gfid_new.2 TEST stat $B0/${V0}1/.shard/$gfid_new.2 TEST unlink $M0/dir/new -TEST ! stat $B0/${V0}0/.shard/$gfid_new.1 -TEST ! stat $B0/${V0}1/.shard/$gfid_new.1 -TEST ! stat $B0/${V0}0/.shard/$gfid_new.2 -TEST ! stat $B0/${V0}1/.shard/$gfid_new.2 +#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1 +#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1 +#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2 +#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2 TEST ! stat $M0/dir/new TEST ! stat $B0/${V0}0/dir/new TEST ! stat $B0/${V0}1/dir/new +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new ####################################### ##### Unlink with /.shard present ##### ####################################### @@ -67,18 +83,32 @@ TEST unlink $M0/dir/foo TEST ! stat $B0/${V0}0/dir/foo TEST ! stat $B0/${V0}1/dir/foo TEST ! stat $M0/dir/foo +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ############################################################# ##### Unlink of a file with only one block (the zeroth) ##### ############################################################# TEST touch $M0/dir/foo +gfid_foo=$(get_gfid_string $M0/dir/foo) TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024 -# Test to ensure that unlink of a sparse file works fine. +# Test to ensure that unlink of a file with only base shard works fine. TEST unlink $M0/dir/foo TEST ! stat $B0/${V0}0/dir/foo TEST ! stat $B0/${V0}1/dir/foo TEST ! stat $M0/dir/foo +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo #################################################### ##### Unlink of a sharded file with hard-links ##### #################################################### @@ -94,6 +124,8 @@ TEST stat $B0/${V0}1/.shard/$gfid_original.2 TEST ln $M0/dir/original $M0/link # Now delete the original file. TEST unlink $M0/dir/original +TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original +TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original # Ensure the shards are still intact. TEST stat $B0/${V0}0/.shard/$gfid_original.1 TEST stat $B0/${V0}1/.shard/$gfid_original.1 @@ -105,15 +137,22 @@ TEST stat $B0/${V0}0/link TEST stat $B0/${V0}1/link # Now delete the last link. TEST unlink $M0/link +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original # Ensure that the shards are all cleaned up. -TEST ! stat $B0/${V0}0/.shard/$gfid_original.1 -TEST ! stat $B0/${V0}1/.shard/$gfid_original.1 -TEST ! stat $B0/${V0}0/.shard/$gfid_original.2 -TEST ! stat $B0/${V0}1/.shard/$gfid_original.2 -TEST ! stat $M0/link +#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1 +#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1 +#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2 +#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2 +#TEST ! stat $M0/link TEST ! stat $B0/${V0}0/link TEST ! stat $B0/${V0}1/link +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 TEST $CLI volume delete $V0 @@ -140,6 +179,7 @@ TEST touch $M0/dir/dst ##### Rename with /.shard absent ##### ###################################### TEST truncate -s 5M $M0/dir/dst +gfid_dst=$(get_gfid_string $M0/dir/dst) TEST ! stat $B0/${V0}0/.shard TEST ! stat $B0/${V0}1/.shard # Test to ensure that rename doesn't fail due to absence of /.shard @@ -150,6 +190,13 @@ TEST ! stat $B0/${V0}0/dir/src TEST ! stat $B0/${V0}1/dir/src TEST stat $B0/${V0}0/dir/dst TEST stat $B0/${V0}1/dir/dst +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ################################################## ##### Rename to a sharded file without holes ##### @@ -165,16 +212,23 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1 TEST stat $B0/${V0}0/.shard/$gfid_dst.2 TEST stat $B0/${V0}1/.shard/$gfid_dst.2 TEST mv -f $M0/dir/src $M0/dir/dst -TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 -TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 -TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 -TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 +#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 TEST ! stat $M0/dir/src TEST stat $M0/dir/dst TEST ! stat $B0/${V0}0/dir/src TEST ! stat $B0/${V0}1/dir/src TEST stat $B0/${V0}0/dir/dst TEST stat $B0/${V0}1/dir/dst +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ################################################### ##### Rename of dst file with /.shard present ##### @@ -182,7 +236,8 @@ TEST stat $B0/${V0}1/dir/dst TEST unlink $M0/dir/dst TEST touch $M0/dir/src TEST truncate -s 5M $M0/dir/dst -# Test to ensure that unlink of a sparse file works fine. +gfid_dst=$(get_gfid_string $M0/dir/dst) +# Test to ensure that rename into a sparse file works fine. TEST mv -f $M0/dir/src $M0/dir/dst TEST ! stat $M0/dir/src TEST stat $M0/dir/dst @@ -190,6 +245,13 @@ TEST ! stat $B0/${V0}0/dir/src TEST ! stat $B0/${V0}1/dir/src TEST stat $B0/${V0}0/dir/dst TEST stat $B0/${V0}1/dir/dst +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ############################################################### ##### Rename of dst file with only one block (the zeroth) ##### @@ -197,7 +259,8 @@ TEST stat $B0/${V0}1/dir/dst TEST unlink $M0/dir/dst TEST touch $M0/dir/src TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024 -# Test to ensure that unlink of a sparse file works fine. +gfid_dst=$(get_gfid_string $M0/dir/dst) +# Test to ensure that rename into a file with only base shard works fine. TEST mv -f $M0/dir/src $M0/dir/dst TEST ! stat $M0/dir/src TEST stat $M0/dir/dst @@ -205,6 +268,13 @@ TEST ! stat $B0/${V0}0/dir/src TEST ! stat $B0/${V0}1/dir/src TEST stat $B0/${V0}0/dir/dst TEST stat $B0/${V0}1/dir/dst +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ######################################################## ##### Rename to a dst sharded file with hard-links ##### @@ -231,18 +301,26 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.2 TEST ! stat $M0/dir/src TEST ! stat $B0/${V0}0/dir/src TEST ! stat $B0/${V0}1/dir/src +TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst # Now rename another file to the last link. TEST touch $M0/dir/src2 TEST mv -f $M0/dir/src2 $M0/link # Ensure that the shards are all cleaned up. -TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 -TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 -TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 -TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 +#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 TEST ! stat $M0/dir/src2 TEST ! stat $B0/${V0}0/dir/src2 TEST ! stat $B0/${V0}1/dir/src2 +TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst # Rename with non-existent dst and a sharded src TEST touch $M0/dir/src TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216 diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h index 77f0cee..fea66aa 100644 --- a/xlators/features/shard/src/shard-mem-types.h +++ b/xlators/features/shard/src/shard-mem-types.h @@ -18,6 +18,7 @@ enum gf_shard_mem_types_ { gf_shard_mt_inode_ctx_t, gf_shard_mt_iovec, gf_shard_mt_int64_t, + gf_shard_mt_uint64_t, gf_shard_mt_end }; #endif diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h index 8e61630..0267f8a 100644 --- a/xlators/features/shard/src/shard-messages.h +++ b/xlators/features/shard/src/shard-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD -#define GLFS_NUM_MESSAGES 19 +#define GLFS_NUM_MESSAGES 20 #define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1) #define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages" @@ -187,5 +187,12 @@ */ #define SHARD_MSG_MEMALLOC_FAILED (GLFS_COMP_BASE_SHARD + 19) +/*! + * @messageid 133020 + * @diagnosis + * @recommendedaction +*/ +#define SHARD_MSG_FOP_FAILED (GLFS_COMP_BASE_SHARD + 20) + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_SHARD_MESSAGES_H_ */ diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 268ba20..492341c 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -117,9 +117,6 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, if (valid & SHARD_MASK_BLOCK_SIZE) ctx->block_size = block_size; - if (!stbuf) - return 0; - if (valid & SHARD_MASK_PROT) ctx->stat.ia_prot = stbuf->ia_prot; @@ -179,7 +176,35 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, } int -__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) +__shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; + + ret = __shard_inode_ctx_get (inode, this, &ctx); + if (ret) + return ret; + + ctx->refresh = _gf_true; + + return 0; +} +int +shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this) +{ + int ret = -1; + + LOCK (&inode->lock); + { + ret = __shard_inode_ctx_set_refresh_flag (inode, this); + } + UNLOCK (&inode->lock); + + return ret; +} + +int +__shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this) { int ret = -1; shard_inode_ctx_t *ctx = NULL; @@ -193,13 +218,13 @@ __shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) } int -shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) +shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this) { int ret = -1; LOCK (&inode->lock); { - ret = __shard_inode_ctx_set_refreshed_flag (inode, this); + ret = __shard_inode_ctx_mark_dir_refreshed (inode, this); } UNLOCK (&inode->lock); @@ -478,9 +503,15 @@ shard_local_wipe (shard_local_t *local) syncbarrier_destroy (&local->barrier); loc_wipe (&local->loc); loc_wipe (&local->dot_shard_loc); + loc_wipe (&local->dot_shard_rm_loc); loc_wipe (&local->loc2); loc_wipe (&local->tmp_loc); + loc_wipe (&local->int_inodelk.loc); + loc_wipe (&local->int_entrylk.loc); + loc_wipe (&local->newloc); + if (local->int_entrylk.basename) + GF_FREE (local->int_entrylk.basename); if (local->fd) fd_unref (local->fd); @@ -504,6 +535,10 @@ shard_local_wipe (shard_local_t *local) iobref_unref (local->iobref); if (local->list_inited) gf_dirent_free (&local->entries_head); + if (local->inodelk_frame) + SHARD_STACK_DESTROY (local->inodelk_frame); + if (local->entrylk_frame) + SHARD_STACK_DESTROY (local->entrylk_frame); } int @@ -554,7 +589,10 @@ shard_internal_dir_string (shard_internal_dir_type_t type) switch (type) { case SHARD_INTERNAL_DIR_DOT_SHARD: - str = ".shard"; + str = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + str = GF_SHARD_REMOVE_ME_DIR; break; default: break; @@ -566,10 +604,13 @@ static int shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local, shard_internal_dir_type_t type) { - int ret = -1; - char *bname = NULL; - loc_t *internal_dir_loc = NULL; + int ret = -1; + char *bname = NULL; + inode_t *parent = NULL; + loc_t *internal_dir_loc = NULL; + shard_priv_t *priv = NULL; + priv = this->private; if (!local) return -1; @@ -577,13 +618,19 @@ shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local, case SHARD_INTERNAL_DIR_DOT_SHARD: internal_dir_loc = &local->dot_shard_loc; bname = GF_SHARD_DIR; + parent = inode_ref (this->itable->root); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + internal_dir_loc = &local->dot_shard_rm_loc; + bname = GF_SHARD_REMOVE_ME_DIR; + parent = inode_ref (priv->dot_shard_inode); break; default: break; } internal_dir_loc->inode = inode_new (this->itable); - internal_dir_loc->parent = inode_ref (this->itable->root); + internal_dir_loc->parent = parent; ret = inode_path (internal_dir_loc->parent, bname, (char **)&internal_dir_loc->path); if (ret < 0 || !(internal_dir_loc->inode)) { @@ -706,11 +753,48 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, } int -shard_common_inode_write_failure_unwind (glusterfs_fop_t fop, - call_frame_t *frame, int32_t op_ret, - int32_t op_errno) +shard_common_failure_unwind (glusterfs_fop_t fop, call_frame_t *frame, + int32_t op_ret, int32_t op_errno) { switch (fop) { + case GF_FOP_LOOKUP: + SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_STAT: + SHARD_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSTAT: + SHARD_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_TRUNCATE: + SHARD_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, + NULL, NULL); + break; + case GF_FOP_FTRUNCATE: + SHARD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, + NULL, NULL); + break; + case GF_FOP_MKNOD: + SHARD_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_LINK: + SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_CREATE: + SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + break; + case GF_FOP_UNLINK: + SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_RENAME: + SHARD_STACK_UNWIND (rename, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + break; case GF_FOP_WRITE: SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL); @@ -727,6 +811,45 @@ shard_common_inode_write_failure_unwind (glusterfs_fop_t fop, SHARD_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL, NULL, NULL); break; + case GF_FOP_READ: + SHARD_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, + NULL, NULL, NULL); + break; + case GF_FOP_FSYNC: + SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_REMOVEXATTR: + SHARD_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FREMOVEXATTR: + SHARD_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FGETXATTR: + SHARD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL, + NULL); + break; + case GF_FOP_GETXATTR: + SHARD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL, + NULL); + break; + case GF_FOP_FSETXATTR: + SHARD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETXATTR: + SHARD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETATTR: + SHARD_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, + NULL, NULL); + break; + case GF_FOP_FSETATTR: + SHARD_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, + NULL, NULL); + break; + case GF_FOP_SEEK: + SHARD_STACK_UNWIND (seek, frame, op_ret, op_errno, 0, NULL); + break; default: gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, "Invalid fop id = %d", fop); @@ -866,7 +989,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; - if (local->op_ret < 0) + if ((local->op_ret < 0) || (local->resolve_not)) goto out; while (shard_idx_iter <= local->last_block) { @@ -1063,19 +1186,26 @@ shard_link_internal_dir_inode (shard_local_t *local, inode_t *inode, shard_priv_t *priv = NULL; char *bname = NULL; inode_t **priv_inode = NULL; + inode_t *parent = NULL; priv = THIS->private; switch (type) { case SHARD_INTERNAL_DIR_DOT_SHARD: - bname = ".shard"; + bname = GF_SHARD_DIR; priv_inode = &priv->dot_shard_inode; + parent = inode->table->root; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + bname = GF_SHARD_REMOVE_ME_DIR; + priv_inode = &priv->dot_shard_rm_inode; + parent = priv->dot_shard_inode; break; default: break; } - linked_inode = inode_link (inode, inode->table->root, bname, buf); + linked_inode = inode_link (inode, parent, bname, buf); inode_lookup (linked_inode); *priv_inode = linked_inode; return linked_inode; @@ -1105,7 +1235,7 @@ shard_refresh_internal_dir_cbk (call_frame_t *frame, void *cookie, * shard_link_internal_dir_inode(). */ linked_inode = shard_link_internal_dir_inode (local, inode, buf, type); - shard_inode_ctx_set_refreshed_flag (linked_inode, this); + shard_inode_ctx_mark_dir_refreshed (linked_inode, this); out: shard_common_resolve_shards (frame, this, local->post_res_handler); return 0; @@ -1128,6 +1258,9 @@ shard_refresh_internal_dir (call_frame_t *frame, xlator_t *this, case SHARD_INTERNAL_DIR_DOT_SHARD: gf_uuid_copy (gfid, priv->dot_shard_gfid); break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy (gfid, priv->dot_shard_rm_gfid); + break; default: break; } @@ -1189,7 +1322,7 @@ shard_lookup_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this if (link_inode != inode) { shard_refresh_internal_dir (frame, this, type); } else { - shard_inode_ctx_set_refreshed_flag (link_inode, this); + shard_inode_ctx_mark_dir_refreshed (link_inode, this); shard_common_resolve_shards (frame, this, local->post_res_handler); } @@ -1233,6 +1366,10 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this, gf_uuid_copy (*gfid, priv->dot_shard_gfid); loc = &local->dot_shard_loc; break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; default: break; } @@ -1383,13 +1520,9 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, STACK_WIND (frame, shard_lookup_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->lookup, loc, local->xattr_req); - return 0; - - err: - SHARD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, - NULL, NULL); + shard_common_failure_unwind (GF_FOP_LOOKUP, frame, -1, op_errno); return 0; } @@ -1610,11 +1743,9 @@ shard_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); - return 0; - err: - SHARD_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL); + shard_common_failure_unwind (GF_FOP_STAT, frame, -1, ENOMEM); return 0; } @@ -1668,9 +1799,8 @@ shard_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); return 0; - err: - SHARD_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FSTAT, frame, -1, ENOMEM); return 0; } @@ -1728,14 +1858,9 @@ shard_truncate_last_shard_cbk (call_frame_t *frame, void *cookie, shard_update_file_size (frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); return 0; - err: - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -1946,12 +2071,8 @@ shard_post_lookup_shards_truncate_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -2232,16 +2353,9 @@ shard_post_resolve_truncate_handler (call_frame_t *frame, xlator_t *this) shard_post_update_size_truncate_handler); return 0; } else { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); + shard_common_failure_unwind (local->fop, frame, + local->op_ret, + local->op_errno); return 0; } } @@ -2329,14 +2443,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this) return 0; err: - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, - NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, - NULL); - - return 0; + shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); + return 0; } int @@ -2348,13 +2456,8 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -2457,7 +2560,7 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, return 0; err: - SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_TRUNCATE, frame, -1, ENOMEM); return 0; } @@ -2512,8 +2615,7 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, shard_post_lookup_truncate_handler); return 0; err: - - SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FTRUNCATE, frame, -1, ENOMEM); return 0; } @@ -2531,7 +2633,7 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) goto unwind; - ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size), + ret = shard_inode_ctx_set (inode, this, buf, local->block_size, SHARD_ALL_MASK); if (ret) gf_msg (this->name, GF_LOG_WARNING, 0, @@ -2549,25 +2651,27 @@ int shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { + shard_priv_t *priv = NULL; shard_local_t *local = NULL; + priv = this->private; local = mem_get0 (this->local_pool); if (!local) goto err; frame->local = local; + local->block_size = priv->block_size; if (!__is_gsyncd_on_shard_dir (frame, loc)) { - SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); + SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0, + 0, err); } STACK_WIND (frame, shard_mknod_cbk, FIRST_CHILD (this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); return 0; - err: - SHARD_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL); + shard_common_failure_unwind (GF_FOP_MKNOD, frame, -1, ENOMEM); return 0; } @@ -2594,8 +2698,7 @@ shard_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, preparent, postparent, xdata); return 0; err: - SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, NULL, NULL, - NULL, NULL); + shard_common_failure_unwind (GF_FOP_LINK, frame, op_ret, op_errno); return 0; } @@ -2660,10 +2763,8 @@ shard_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, shard_lookup_base_file (frame, this, &local->loc, shard_post_lookup_link_handler); return 0; - err: - SHARD_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); + shard_common_failure_unwind (GF_FOP_LINK, frame, -1, ENOMEM); return 0; } @@ -2678,13 +2779,8 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { - if (local->fop == GF_FOP_UNLINK) - SHARD_STACK_UNWIND (unlink, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } local->op_ret = 0; @@ -2724,13 +2820,9 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) shard_rename_cbk (frame, this); return 0; } else { - if (local->fop == GF_FOP_UNLINK) - SHARD_STACK_UNWIND (unlink, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); - else - shard_rename_cbk (frame, this); + shard_common_failure_unwind (local->fop, frame, + local->op_ret, + local->op_errno); return 0; } } @@ -2745,103 +2837,6 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) return 0; } -int -shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int ret = 0; - uint32_t link_count = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - local = frame->local; - priv = this->private; - - if (op_ret < 0) { - SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL, - NULL); - return 0; - } - - /* Because link() does not create links for all but the - * base shard, unlink() must delete these shards only when the - * link count is 1. We can return safely now. - */ - if ((xdata) && (!dict_get_uint32 (xdata, GET_LINK_COUNT, &link_count)) - && (link_count > 1)) - goto unwind; - - local->first_block = get_lowest_block (0, local->block_size); - local->last_block = get_highest_block (0, local->prebuf.ia_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; - local->resolver_base_inode = local->loc.inode; - - /* num_blocks = 1 implies that the file has not crossed its - * shard block size. So unlink boils down to unlinking just the - * base file. We can safely return now. - */ - if (local->num_blocks == 1) - goto unwind; - - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto unwind; - - /* Save the xdata and preparent and postparent iatts now. This will be - * used at the time of unwinding the call to the parent xl. - */ - local->preoldparent = *preparent; - local->postoldparent = *postparent; - if (xdata) - local->xattr_rsp = dict_ref (xdata); - - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_internal_dir_loc (this, local, - SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto unwind; - shard_lookup_internal_dir (frame, this, - shard_post_resolve_unlink_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_post_resolve_unlink_handler; - shard_refresh_internal_dir (frame, this, - SHARD_INTERNAL_DIR_DOT_SHARD); - } - - return 0; - -unwind: - SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; -} - -int -shard_unlink_base_file (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0)) - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_DICT_SET_FAILED, "Failed to set " - GET_LINK_COUNT" in dict"); - - /* To-Do: Request open-fd count on base file */ - STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, - local->xattr_req); - return 0; -} - void shard_unlink_block_inode (shard_local_t *local, int shard_block_num) { @@ -3062,160 +3057,754 @@ next: } int -shard_post_lookup_unlink_handler (call_frame_t *frame, xlator_t *this) +shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - shard_local_t *local = NULL; + if (op_ret) + gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY (frame); + return 0; +} - local = frame->local; +int +shard_unlock_inodelk (call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *lock = NULL; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (unlink, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } + local = frame->local; + lk_frame = local->inodelk_frame; + lk_local = lk_frame->local; + local->inodelk_frame = NULL; + loc = &local->int_inodelk.loc; + lock = &lk_local->int_inodelk; + lock->flock.l_type = F_UNLCK; - shard_unlink_base_file (frame, this); + STACK_WIND (lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, + F_SETLK, &lock->flock, NULL); + local->int_inodelk.acquired_lock = _gf_false; return 0; } int -shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata); +int +shard_rename_src_base_file (call_frame_t *frame, xlator_t *this) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + int ret = 0; + loc_t *dst_loc = NULL; + loc_t tmp_loc = {0,}; + shard_local_t *local = frame->local; - ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (loc->inode->gfid)); - goto err; - } + if (local->dst_block_size) { + tmp_loc.parent = inode_ref (local->loc2.parent); + ret = inode_path (tmp_loc.parent, local->loc2.name, + (char **)&tmp_loc.path); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" + " on pargfid=%s bname=%s", + uuid_utoa (tmp_loc.parent->gfid), + local->loc2.name); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; + tmp_loc.name = strrchr (tmp_loc.path, '/'); + if (tmp_loc.name) + tmp_loc.name++; + dst_loc = &tmp_loc; + } else { + dst_loc = &local->loc2; } - local = mem_get0 (this->local_pool); - if (!local) - goto err; - - frame->local = local; - - loc_copy (&local->loc, loc); - local->xflag = xflag; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - local->block_size = block_size; - local->resolver_base_inode = loc->inode; - local->fop = GF_FOP_UNLINK; - if (!this->itable) - this->itable = (local->loc.inode)->table; - - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_unlink_handler); + /* To-Do: Request open-fd count on dst base file */ + STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, + local->xattr_req); + loc_wipe (&tmp_loc); return 0; err: - SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + loc_wipe (&tmp_loc); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; - } int -shard_rename_cbk (call_frame_t *frame, xlator_t *this) +shard_unlink_base_file (call_frame_t *frame, xlator_t *this); + +int +shard_set_size_attrs_on_marker_file_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, + dict_t *xdata) { + shard_priv_t *priv = NULL; shard_local_t *local = NULL; + priv = this->private; local = frame->local; + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_FOP_FAILED, "Xattrop on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string (local->fop), local->newloc.name); + goto err; + } - SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->preoldparent, - &local->postoldparent, &local->prenewparent, - &local->postnewparent, local->xattr_rsp); + inode_unlink (local->newloc.inode, priv->dot_shard_rm_inode, + local->newloc.name); + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file (frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file (frame, this); + return 0; +err: + shard_common_failure_unwind (local->fop, frame, op_ret, op_errno); return 0; } int -shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this) +shard_set_size_attrs_on_marker_file (call_frame_t *frame, xlator_t *this) { - int ret = -1; - uint32_t link_count = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; + int op_errno = ENOMEM; + uint64_t bs = 0; + dict_t *xdata = NULL; + shard_local_t *local = NULL; local = frame->local; - priv = this->private; - - local->first_block = get_lowest_block (0, local->dst_block_size); - local->last_block = get_highest_block (0, local->postbuf.ia_size, - local->dst_block_size); - local->num_blocks = local->last_block - local->first_block + 1; - local->resolver_base_inode = local->loc2.inode; + xdata = dict_new (); + if (!xdata) + goto err; - if ((local->xattr_rsp) && - (!dict_get_uint32 (local->xattr_rsp, GET_LINK_COUNT, &link_count)) - && (link_count > 1)) { - shard_rename_cbk (frame, this); - return 0; + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + SHARD_INODE_CREATE_INIT (this, bs, xdata, &local->newloc, + local->prebuf.ia_size, 0, err); + STACK_WIND (frame, shard_set_size_attrs_on_marker_file_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, + &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); + dict_unref (xdata); + return 0; +err: + if (xdata) + dict_unref (xdata); + shard_common_failure_unwind (local->fop, frame, -1, op_errno); + return 0; +} + +int +shard_lookup_marker_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_FOP_FAILED, "Lookup on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string (local->fop), local->newloc.name); + goto err; } - if (local->num_blocks == 1) { - shard_rename_cbk (frame, this); + linked_inode = inode_link (inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref (local->newloc.inode); + local->newloc.inode = linked_inode; + shard_set_size_attrs_on_marker_file (frame, this); + return 0; +err: + shard_common_failure_unwind (local->fop, frame, op_ret, op_errno); + return 0; +} + +int +shard_lookup_marker_file (call_frame_t *frame, xlator_t *this) +{ + int op_errno = ENOMEM; + dict_t *xattr_req = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + xattr_req = shard_create_gfid_dict (local->xattr_req); + if (!xattr_req) + goto err; + + STACK_WIND (frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); + dict_unref (xattr_req); + return 0; +err: + if (xattr_req) + dict_unref (xattr_req); + shard_common_failure_unwind (local->fop, frame, -1, op_errno); + return 0; +} + +int +shard_create_marker_file_under_remove_me_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, + struct iatt *preparent, + struct iatt *postparent, + dict_t *xdata) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + SHARD_UNSET_ROOT_FS_ID (frame, local); + if (op_ret < 0) { + if ((op_errno != EEXIST) && (op_errno != ENODATA)) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_FOP_FAILED, "Marker file creation " + "failed while performing %s; entry gfid=%s", + gf_fop_string (local->fop), local->newloc.name); + goto err; + } else { + shard_lookup_marker_file (frame, this); + return 0; + } + } + + linked_inode = inode_link (inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref (local->newloc.inode); + local->newloc.inode = linked_inode; + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file (frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file (frame, this); + return 0; +err: + shard_common_failure_unwind (local->fop, frame, -1, local->op_errno); + return 0; +} + +int +shard_create_marker_file_under_remove_me (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + int ret = 0; + int op_errno = ENOMEM; + uint64_t bs = 0; + char g1[64] = {0,}; + char g2[64] = {0,}; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + SHARD_SET_ROOT_FS_ID (frame, local); + + xattr_req = shard_create_gfid_dict (local->xattr_req); + if (!xattr_req) + goto err; + + local->newloc.inode = inode_new (this->itable); + local->newloc.parent = inode_ref (priv->dot_shard_rm_inode); + ret = inode_path (local->newloc.parent, uuid_utoa (loc->inode->gfid), + (char **)&local->newloc.path); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_PATH_FAILED, "Inode path failed on " + "pargfid=%s bname=%s", + uuid_utoa_r (priv->dot_shard_rm_gfid, g1), + uuid_utoa_r (loc->inode->gfid, g2)); + goto err; + } + local->newloc.name = strrchr (local->newloc.path, '/'); + if (local->newloc.name) + local->newloc.name++; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + + SHARD_INODE_CREATE_INIT (this, bs, xattr_req, &local->newloc, + local->prebuf.ia_size, 0, err); + + STACK_WIND (frame, shard_create_marker_file_under_remove_me_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + &local->newloc, 0, 0, 0644, xattr_req); + dict_unref (xattr_req); + return 0; + +err: + if (xattr_req) + dict_unref (xattr_req); + shard_create_marker_file_under_remove_me_cbk (frame, 0, this, -1, + op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; +} + +int +shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + +int +shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int ret = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + local->preoldparent = *preparent; + local->postoldparent = *postparent; + if (xdata) + local->xattr_rsp = dict_ref (xdata); + } + if (local->entrylk_frame) { + ret = shard_unlock_entrylk (frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + } + + ret = shard_unlock_inodelk (frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + shard_unlink_cbk (frame, this); + return 0; +} + +int +shard_unlink_base_file (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = frame->local; + + /* To-Do: Request open-fd count on base file */ + STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xattr_req); + return 0; +} + +int +shard_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + if (op_ret) + gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY (frame); + return 0; +} + +int +shard_unlock_entrylk (call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_entrylk_t *lock = NULL; + + local = frame->local; + lk_frame = local->entrylk_frame; + lk_local = lk_frame->local; + local->entrylk_frame = NULL; + lock = &lk_local->int_entrylk; + loc = &lock->loc; + + STACK_WIND (lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, loc, + lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, + ENTRYLK_WRLCK, NULL); + local->int_entrylk.acquired_lock = _gf_false; + return 0; +} + +int +shard_post_entrylk_fop_handler (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_create_marker_file_under_remove_me (frame, this, + &local->int_inodelk.loc); + break; + default: + gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-entrylk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} + +int +shard_acquire_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind (main_local->fop, main_frame, + op_ret, op_errno); return 0; } + main_local->int_entrylk.acquired_lock = _gf_true; + shard_post_entrylk_fop_handler (main_frame, this); + return 0; +} - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; +int +shard_acquire_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid) +{ + char gfid_str[GF_UUID_BUF_SIZE] = {0,}; + shard_local_t *local = NULL; + shard_local_t *entrylk_local = NULL; + shard_entrylk_t *int_entrylk = NULL; + call_frame_t *entrylk_frame = NULL; - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_internal_dir_loc (this, local, - SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto out; - shard_lookup_internal_dir (frame, this, - shard_post_resolve_unlink_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); + entrylk_frame = create_frame (this, this->ctx->pool); + if (!entrylk_frame) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame " + "to lock marker file"); + goto err; + } + + entrylk_local = mem_get0 (this->local_pool); + if (!entrylk_local) { + STACK_DESTROY (entrylk_frame->root); + goto err; + } + + local = frame->local; + entrylk_frame->local = entrylk_local; + entrylk_local->main_frame = frame; + int_entrylk = &entrylk_local->int_entrylk; + + int_entrylk->loc.inode = inode_ref (inode); + set_lk_owner_from_ptr (&entrylk_frame->root->lk_owner, + entrylk_frame->root); + local->entrylk_frame = entrylk_frame; + gf_uuid_unparse (gfid, gfid_str); + int_entrylk->basename = gf_strdup (gfid_str); + + STACK_WIND (entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, + &int_entrylk->loc, int_entrylk->basename, ENTRYLK_LOCK, + ENTRYLK_WRLCK, NULL); + return 0; +err: + shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); + return 0; +} + +int +shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind (local->fop, frame, -1, + local->op_errno); + return 0; + } + + if (local->prebuf.ia_nlink > 1) { + gf_msg_debug (this->name, 0, "link count on %s > 1:%d, " + "performing rename()/unlink()", + local->int_inodelk.loc.path, local->prebuf.ia_nlink); + if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file (frame, this); + else if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file (frame, this); } else { - local->post_res_handler = shard_post_resolve_unlink_handler; - shard_refresh_internal_dir (frame, this, - SHARD_INTERNAL_DIR_DOT_SHARD); + gf_msg_debug (this->name, 0, "link count on %s = 1, creating " + "file under .remove_me", local->int_inodelk.loc.path); + shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode, + local->prebuf.ia_gfid); } + return 0; +} +int +shard_post_inodelk_fop_handler (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_lookup_base_file (frame, this, &local->int_inodelk.loc, + shard_post_lookup_base_shard_rm_handler); + break; + default: + gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-inodelk handler not defined. This case should not" + " be hit"); + break; + } return 0; +} -out: - SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL, NULL); +int +shard_acquire_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind (main_local->fop, main_frame, + op_ret, op_errno); + return 0; + } + main_local->int_inodelk.acquired_lock = _gf_true; + shard_post_inodelk_fop_handler (main_frame, this); return 0; } int -shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) +shard_acquire_inodelk (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *int_inodelk = NULL; + + lk_frame = create_frame (this, this->ctx->pool); + if (!lk_frame) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame " + "to lock base shard"); + goto err; + } + lk_local = mem_get0 (this->local_pool); + if (!lk_local) { + STACK_DESTROY (lk_frame->root); + goto err; + } + + local = frame->local; + lk_frame->local = lk_local; + lk_local->main_frame = frame; + int_inodelk = &lk_local->int_inodelk; + + int_inodelk->flock.l_len = 0; + int_inodelk->flock.l_start = 0; + int_inodelk->domain = this->name; + int_inodelk->flock.l_type = F_WRLCK; + loc_copy (&local->int_inodelk.loc, loc); + set_lk_owner_from_ptr (&lk_frame->root->lk_owner, lk_frame->root); + local->inodelk_frame = lk_frame; + + STACK_WIND (lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, + &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); + return 0; +err: + shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); + return 0; +} + +int +shard_post_mkdir_rm_handler (call_frame_t *frame, xlator_t *this) { + loc_t *loc = NULL; shard_local_t *local = NULL; local = frame->local; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); + shard_common_failure_unwind (local->fop, frame, -1, + local->op_errno); return 0; } + if (local->fop == GF_FOP_UNLINK) + loc = &local->loc; + else if (local->fop == GF_FOP_RENAME) + loc = &local->loc2; + shard_acquire_inodelk (frame, this, loc); + return 0; +} - if (local->dst_block_size) - shard_rename_unlink_dst_shards_do (frame, this); - else - shard_rename_cbk (frame, this); +int +shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type); +int +shard_pre_mkdir_rm_handler (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind (local->fop, frame, -1, + local->op_errno); + return 0; + } + shard_mkdir_internal_dir (frame, this, shard_post_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + return 0; +} + +void +shard_begin_rm_resolution (call_frame_t *frame, xlator_t *this) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + local->dot_shard_rm_loc.inode = inode_find (this->itable, + priv->dot_shard_rm_gfid); + if (!local->dot_shard_rm_loc.inode) { + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + shard_mkdir_internal_dir (frame, this, + shard_pre_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_pre_mkdir_rm_handler; + shard_refresh_internal_dir (frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD); + } + } else { + local->post_res_handler = shard_post_mkdir_rm_handler; + shard_refresh_internal_dir (frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + } +} + +int +shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " + "size from inode ctx of %s", + uuid_utoa (loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + } + + local = mem_get0 (this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy (&local->loc, loc); + local->xflag = xflag; + local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); + local->block_size = block_size; + local->resolver_base_inode = loc->inode; + local->fop = GF_FOP_UNLINK; + if (!this->itable) + this->itable = (local->loc.inode)->table; + + local->resolve_not = _gf_true; + shard_begin_rm_resolution (frame, this); + return 0; +err: + shard_common_failure_unwind (GF_FOP_UNLINK, frame, -1, ENOMEM); + return 0; +} + +int +shard_rename_cbk (call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->preoldparent, + &local->postoldparent, &local->prenewparent, + &local->postnewparent, local->xattr_rsp); + return 0; +} + +int +shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) +{ + shard_rename_cbk (frame, this); return 0; } @@ -3226,6 +3815,7 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { + int ret = 0; shard_local_t *local = NULL; local = frame->local; @@ -3235,6 +3825,11 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; goto err; } + /* Set ctx->refresh to TRUE to force a lookup on disk when + * shard_lookup_base_file() is called next to refresh the hard link + * count in ctx + */ + shard_inode_ctx_set_refresh_flag (local->int_inodelk.loc.inode, this); local->prebuf = *buf; local->preoldparent = *preoldparent; @@ -3244,40 +3839,37 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (xdata) local->xattr_rsp = dict_ref (xdata); - /* Now the base file is looked up to gather the ia_size and ia_blocks.*/ + if (local->dst_block_size) { + if (local->entrylk_frame) { + ret = shard_unlock_entrylk (frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + } + ret = shard_unlock_inodelk (frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + goto err; + } + } + + /* Now the base file of src, if sharded, is looked up to gather ia_size + * and ia_blocks.*/ if (local->block_size) { local->tmp_loc.inode = inode_new (this->itable); gf_uuid_copy (local->tmp_loc.gfid, (local->loc.inode)->gfid); shard_lookup_base_file (frame, this, &local->tmp_loc, shard_post_rename_lookup_handler); } else { - shard_rename_unlink_dst_shards_do (frame, this); + shard_rename_cbk (frame, this); } - return 0; err: - SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); - return 0; -} - -int -shard_rename_src_base_file (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0)) - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_DICT_SET_FAILED, "Failed to set " - GET_LINK_COUNT" in dict"); - - /* To-Do: Request open-fd count on dst base file */ - STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &local->loc, &local->loc2, - local->xattr_req); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -3289,9 +3881,8 @@ shard_post_lookup_dst_base_file_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -3332,6 +3923,7 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (newloc->inode) ret = shard_inode_ctx_get_block_size (newloc->inode, this, &dst_block_size); + /* The following stack_wind covers the case where: * a. the src file is not sharded and dst doesn't exist, OR * b. the src and dst both exist but are not sharded. @@ -3361,26 +3953,26 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, local->dst_block_size = dst_block_size; if (!this->itable) this->itable = (local->loc.inode)->table; + local->resolve_not = _gf_true; - if (local->dst_block_size) - /* The if block covers the case where the dst file exists and is - * sharded. So it is important to look up this inode, record its - * size, before renaming src to dst, so as to NOT lose this - * information. - */ - shard_lookup_base_file (frame, this, &local->loc2, - shard_post_lookup_dst_base_file_handler); - else - /* The following block covers the case where the dst either - * doesn't exist or is NOT sharded. In this case, shard xlator - * would go ahead and rename src to dst. - */ + /* The following if-block covers the case where the dst file exists + * and is sharded. + */ + if (local->dst_block_size) { + shard_begin_rm_resolution (frame, this); + } else { + /* The following block covers the case where the dst either doesn't + * exist or is NOT sharded but the src is sharded. In this case, shard + * xlator would go ahead and rename src to dst. Once done, it would also + * lookup the base shard of src to get the ia_size and ia_blocks xattr + * values. + */ shard_rename_src_base_file (frame, this); + } return 0; err: - SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_RENAME, frame, -1, ENOMEM); return 0; } @@ -3400,8 +3992,8 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) goto unwind; - ret = shard_inode_ctx_set (inode, this, stbuf, - ntoh64 (local->block_size), SHARD_ALL_MASK); + ret = shard_inode_ctx_set (inode, this, stbuf, local->block_size, + SHARD_ALL_MASK); if (ret) gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode " @@ -3417,28 +4009,29 @@ int shard_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { + shard_priv_t *priv = NULL; shard_local_t *local = NULL; + priv = this->private; local = mem_get0 (this->local_pool); if (!local) goto err; frame->local = local; + local->block_size = priv->block_size; if (!__is_gsyncd_on_shard_dir (frame, loc)) { - SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); + SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0, + 0, err); } STACK_WIND (frame, shard_create_cbk, FIRST_CHILD (this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, xdata); return 0; - err: - SHARD_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_CREATE, frame, -1, ENOMEM); return 0; - } int @@ -3523,9 +4116,9 @@ out: if (call_count == 0) { SHARD_UNSET_ROOT_FS_ID (frame, local); if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, - NULL, NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, + local->op_ret, + local->op_errno); } else { if (xdata) local->xattr_rsp = dict_ref (xdata); @@ -3792,8 +4385,8 @@ shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, + local->op_errno); return 0; } @@ -3815,8 +4408,8 @@ shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, + local->op_errno); return 0; } @@ -3839,9 +4432,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this) if (local->op_ret < 0) { if (local->op_errno != ENOENT) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, - NULL, NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, + local->op_ret, + local->op_errno); return 0; } else { struct iovec vec = {0,}; @@ -3878,8 +4471,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) local = frame->local; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, + local->op_errno); return 0; } @@ -3955,10 +4548,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) SHARD_INTERNAL_DIR_DOT_SHARD); } return 0; - err: - SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, - NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM); return 0; } @@ -4018,8 +4609,7 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, shard_post_lookup_readv_handler); return 0; err: - SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, - NULL); + shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM); return 0; } @@ -4032,9 +4622,8 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame, local = frame->local; if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (local->fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); } else { shard_common_inode_write_success_unwind (local->fop, frame, local->written_size); @@ -4139,9 +4728,8 @@ shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie, if (call_count == 0) { SHARD_UNSET_ROOT_FS_ID (frame, local); if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (fop, frame, local->op_ret, + local->op_errno); } else { shard_get_delta_size_from_inode_ctx (local, local->fd->inode, @@ -4343,9 +4931,8 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame, local = frame->local; if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (local->fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -4368,9 +4955,8 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, local = frame->local; if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (local->fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -4386,10 +4972,6 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, } int -shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t handler, - shard_internal_dir_type_t type); -int shard_common_inode_write_post_resolve_handler (call_frame_t *frame, xlator_t *this) { @@ -4398,9 +4980,8 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame, local = frame->local; if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (local->fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -4423,9 +5004,8 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, shard_priv_t *priv = this->private; if (local->op_ret < 0) { - shard_common_inode_write_failure_unwind (local->fop, frame, - local->op_ret, - local->op_errno); + shard_common_failure_unwind (local->fop, frame, local->op_ret, + local->op_errno); return 0; } @@ -4443,8 +5023,7 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) { - shard_common_inode_write_failure_unwind (local->fop, frame, - -1, ENOMEM); + shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); return 0; } @@ -4508,7 +5087,7 @@ shard_mkdir_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (link_inode != inode) { shard_refresh_internal_dir (frame, this, type); } else { - shard_inode_ctx_set_refreshed_flag (link_inode, this); + shard_inode_ctx_mark_dir_refreshed (link_inode, this); shard_common_resolve_shards (frame, this, local->post_res_handler); } @@ -4544,6 +5123,10 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, gf_uuid_copy (*gfid, priv->dot_shard_gfid); loc = &local->dot_shard_loc; break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; default: break; } @@ -4702,8 +5285,8 @@ out: return 0; if (local->op_ret < 0) { - SHARD_STACK_UNWIND (fsync, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); } else { shard_get_timestamps_from_inode_ctx (local, base_inode, this); SHARD_STACK_UNWIND (fsync, frame, local->op_ret, @@ -4733,8 +5316,8 @@ shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this) INIT_LIST_HEAD (©); if (local->op_ret < 0) { - SHARD_STACK_UNWIND (fsync, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); return 0; } @@ -4847,7 +5430,7 @@ shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, shard_post_lookup_fsync_handler); return 0; err: - SHARD_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FSYNC, frame, -1, ENOMEM); return 0; } @@ -5069,9 +5652,8 @@ shard_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); return 0; - out: - SHARD_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); + shard_common_failure_unwind (GF_FOP_REMOVEXATTR, frame, -1, op_errno); return 0; } @@ -5095,9 +5677,8 @@ shard_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); return 0; - out: - SHARD_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); + shard_common_failure_unwind (GF_FOP_FREMOVEXATTR, frame, -1, op_errno); return 0; } @@ -5135,9 +5716,8 @@ shard_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, shard_fgetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); return 0; - out: - SHARD_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FGETXATTR, frame, -1, op_errno); return 0; } @@ -5176,9 +5756,8 @@ shard_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, STACK_WIND (frame, shard_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); return 0; - out: - SHARD_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); + shard_common_failure_unwind (GF_FOP_GETXATTR, frame, -1, op_errno); return 0; } @@ -5197,9 +5776,8 @@ shard_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); return 0; - out: - SHARD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + shard_common_failure_unwind (GF_FOP_FSETXATTR, frame, -1, op_errno); return 0; } @@ -5218,9 +5796,8 @@ shard_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); return 0; - out: - SHARD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + shard_common_failure_unwind (GF_FOP_SETXATTR, frame, -1, op_errno); return 0; } @@ -5335,11 +5912,9 @@ shard_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, local->xattr_req); - return 0; - err: - SHARD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_SETATTR, frame, -1, ENOMEM); return 0; } @@ -5398,9 +5973,8 @@ shard_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, local->xattr_req); return 0; - err: - SHARD_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FSETATTR, frame, -1, ENOMEM); return 0; } @@ -5502,7 +6076,7 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, shard_common_inode_write_post_lookup_handler); return 0; out: - shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM); + shard_common_failure_unwind (fop, frame, -1, ENOMEM); return 0; } @@ -5527,9 +6101,8 @@ shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, shard_common_inode_write_begin (frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, offset, keep_size, len, NULL, xdata); return 0; - out: - SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL); + shard_common_failure_unwind (GF_FOP_FALLOCATE, frame, -1, ENOTSUP); return 0; } @@ -5558,7 +6131,7 @@ shard_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, /* TBD */ gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, "seek called on %s.", uuid_utoa (fd->inode->gfid)); - SHARD_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL); + shard_common_failure_unwind (GF_FOP_SEEK, frame, -1, ENOTSUP); return 0; } @@ -5619,6 +6192,7 @@ init (xlator_t *this) goto out; } gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid); + gf_uuid_parse (DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); this->private = priv; LOCK_INIT (&priv->lock); diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 225caa0..1783ff6 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -18,6 +18,7 @@ #include "syncop.h" #define GF_SHARD_DIR ".shard" +#define GF_SHARD_REMOVE_ME_DIR ".remove_me" #define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) #define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard." @@ -55,6 +56,12 @@ #define get_highest_block(off, len, shard_size) \ (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size)) +int +shard_unlock_inodelk (call_frame_t *frame, xlator_t *this); + +int +shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + #define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \ if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \ (((loc->parent) && \ @@ -79,39 +86,57 @@ } \ } while (0) -#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \ - shard_local_t *__local = NULL; \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - if (__local) { \ - shard_local_wipe (__local); \ - mem_put (__local); \ - } \ +#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \ + shard_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + if (__local && __local->int_inodelk.acquired_lock) \ + shard_unlock_inodelk (frame, frame->this); \ + if (__local && __local->int_entrylk.acquired_lock) \ + shard_unlock_entrylk (frame, frame->this); \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ + if (__local) { \ + shard_local_wipe (__local); \ + mem_put (__local); \ + } \ } while (0) +#define SHARD_STACK_DESTROY(frame) \ + do { \ + shard_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + if (__local) { \ + shard_local_wipe (__local); \ + mem_put (__local); \ + } \ + } while (0); + -#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \ +#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size, \ + block_count, label) do { \ int __ret = -1; \ int64_t *__size_attr = NULL; \ - shard_priv_t *__priv = NULL; \ + uint64_t *__bs = 0; \ \ - __priv = this->private; \ - \ - local->block_size = hton64 (__priv->block_size); \ - __ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, \ - &local->block_size, \ - sizeof (local->block_size)); \ + __bs = GF_CALLOC (1, sizeof (uint64_t), gf_shard_mt_uint64_t); \ + if (!__bs) \ + goto label; \ + *__bs = hton64 (block_size); \ + __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs, \ + sizeof (*__bs)); \ if (__ret) { \ gf_msg (this->name, GF_LOG_WARNING, 0, \ SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ - "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path); \ + "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\ + GF_FREE (__bs); \ goto label; \ } \ \ - __ret = shard_set_size_attrs (0, 0, &__size_attr); \ + __ret = shard_set_size_attrs (size, block_count, &__size_attr); \ if (__ret) \ goto label; \ \ @@ -120,7 +145,7 @@ if (__ret) { \ gf_msg (this->name, GF_LOG_WARNING, 0, \ SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ - "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \ + "on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \ GF_FREE (__size_attr); \ goto label; \ } \ @@ -172,21 +197,34 @@ } \ } while (0) +/* rm = "remove me" */ typedef struct shard_priv { uint64_t block_size; uuid_t dot_shard_gfid; + uuid_t dot_shard_rm_gfid; inode_t *dot_shard_inode; + inode_t *dot_shard_rm_inode; gf_lock_t lock; int inode_count; struct list_head ilist_head; } shard_priv_t; typedef struct { - loc_t *loc; - short type; + loc_t loc; char *domain; -} shard_lock_t; + struct gf_flock flock; + gf_boolean_t acquired_lock; +} shard_inodelk_t; + +typedef struct { + loc_t loc; + char *domain; + char *basename; + entrylk_cmd cmd; + entrylk_type type; + gf_boolean_t acquired_lock; +} shard_entrylk_t; typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame, xlator_t *this); @@ -200,6 +238,7 @@ typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame, typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame, xlator_t *this); + typedef struct shard_local { int op_ret; int op_errno; @@ -227,6 +266,7 @@ typedef struct shard_local { int delta_blocks; loc_t loc; loc_t dot_shard_loc; + loc_t dot_shard_rm_loc; loc_t loc2; loc_t tmp_loc; fd_t *fd; @@ -251,16 +291,18 @@ typedef struct shard_local { shard_post_resolve_fop_handler_t post_res_handler; shard_post_mknod_fop_handler_t post_mknod_handler; shard_post_update_size_fop_handler_t post_update_size_handler; - struct { - int lock_count; - fop_inodelk_cbk_t inodelk_cbk; - shard_lock_t *shard_lock; - } lock; + shard_inodelk_t int_inodelk; + shard_entrylk_t int_entrylk; inode_t *resolver_base_inode; gf_boolean_t first_lookup_done; syncbarrier_t barrier; gf_boolean_t lookup_shards_barriered; gf_boolean_t unlink_shards_barriered; + gf_boolean_t resolve_not; + loc_t newloc; + call_frame_t *main_frame; + call_frame_t *inodelk_frame; + call_frame_t *entrylk_frame; } shard_local_t; typedef struct shard_inode_ctx { @@ -284,6 +326,7 @@ typedef struct shard_inode_ctx { typedef enum { SHARD_INTERNAL_DIR_DOT_SHARD = 1, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME, } shard_internal_dir_type_t; #endif /* __SHARD_H__ */ -- 1.8.3.1