From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001 From: Vinayakswami Hariharmath Date: Thu, 6 Aug 2020 14:39:59 +0530 Subject: [PATCH 569/584] features/shard: optimization over shard lookup in case of prealloc Assume that we are preallocating a VM of size 1TB with a shard block size of 64MB then there will be ~16k shards. This creation happens in 2 steps shard_fallocate() path i.e 1. lookup for the shards if any already present and 2. mknod over those shards do not exist. But in case of fresh creation, we dont have to lookup for all shards which are not present as the the file size will be 0. Through this, we can save lookup on all shards which are not present. This optimization is quite useful in the case of preallocating big vm. Also if the file is already present and the call is to extend it to bigger size then we need not to lookup for non- existent shards. Just lookup preexisting shards, populate the inodes and issue mknod on extended size. Backport of: > Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/ > Fixes: #1425 > Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880 > Signed-off-by: Vinayakswami Hariharmath BUG: 1925425 Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880 Signed-off-by: Vinayakswami Hariharmath Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- tests/bugs/shard/issue-1425.t | 45 +++++++++++++++++++++++++++++++++++++ xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------ 2 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 tests/bugs/shard/issue-1425.t diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t new file mode 100644 index 0000000..bbe82c0 --- /dev/null +++ b/tests/bugs/shard/issue-1425.t @@ -0,0 +1,45 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +FILE_COUNT_TIME=5 + +function get_file_count { + ls $1* | wc -l +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}0 +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + +TEST fallocate -l 20M $M0/foo +gfid_new=$(get_gfid_string $M0/foo) + +# Check for the base shard +TEST stat $M0/foo +TEST stat $B0/${V0}0/foo + +# There should be 4 associated shards +EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new + +# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch +EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'` + +# Delete the base shard and check shards get cleaned up +TEST unlink $M0/foo + +TEST ! stat $M0/foo +TEST ! stat $B0/${V0}0/foo + +# There should be no shards now +EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new +cleanup diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 2ba4528..a6ad1b8 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } -- 1.8.3.1