|
|
c5d8c8 |
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
|
|
|
c5d8c8 |
From: Vinayakswami Hariharmath <vharihar@redhat.com>
|
|
|
c5d8c8 |
Date: Thu, 6 Aug 2020 14:39:59 +0530
|
|
|
c5d8c8 |
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
|
|
|
c5d8c8 |
case of prealloc
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
Assume that we are preallocating a VM of size 1TB with a shard
|
|
|
c5d8c8 |
block size of 64MB then there will be ~16k shards.
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
This creation happens in 2 steps shard_fallocate() path i.e
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
1. lookup for the shards if any already present and
|
|
|
c5d8c8 |
2. mknod over those shards do not exist.
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
But in case of fresh creation, we dont have to lookup for all
|
|
|
c5d8c8 |
shards which are not present as the the file size will be 0.
|
|
|
c5d8c8 |
Through this, we can save lookup on all shards which are not
|
|
|
c5d8c8 |
present. This optimization is quite useful in the case of
|
|
|
c5d8c8 |
preallocating big vm.
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
Also if the file is already present and the call is to
|
|
|
c5d8c8 |
extend it to bigger size then we need not to lookup for non-
|
|
|
c5d8c8 |
existent shards. Just lookup preexisting shards, populate
|
|
|
c5d8c8 |
the inodes and issue mknod on extended size.
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
Backport of:
|
|
|
c5d8c8 |
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
|
|
|
c5d8c8 |
> Fixes: #1425
|
|
|
c5d8c8 |
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
|
|
|
c5d8c8 |
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
BUG: 1925425
|
|
|
c5d8c8 |
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
|
|
|
c5d8c8 |
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
|
|
|
c5d8c8 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
|
|
|
c5d8c8 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
c5d8c8 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
c5d8c8 |
---
|
|
|
c5d8c8 |
tests/bugs/shard/issue-1425.t | 45 +++++++++++++++++++++++++++++++++++++
|
|
|
c5d8c8 |
xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
|
|
|
c5d8c8 |
2 files changed, 84 insertions(+), 7 deletions(-)
|
|
|
c5d8c8 |
create mode 100644 tests/bugs/shard/issue-1425.t
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
|
|
|
c5d8c8 |
new file mode 100644
|
|
|
c5d8c8 |
index 0000000..bbe82c0
|
|
|
c5d8c8 |
--- /dev/null
|
|
|
c5d8c8 |
+++ b/tests/bugs/shard/issue-1425.t
|
|
|
c5d8c8 |
@@ -0,0 +1,45 @@
|
|
|
c5d8c8 |
+#!/bin/bash
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+. $(dirname $0)/../../include.rc
|
|
|
c5d8c8 |
+. $(dirname $0)/../../volume.rc
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+cleanup;
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+FILE_COUNT_TIME=5
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+function get_file_count {
|
|
|
c5d8c8 |
+ ls $1* | wc -l
|
|
|
c5d8c8 |
+}
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+TEST glusterd
|
|
|
c5d8c8 |
+TEST pidof glusterd
|
|
|
c5d8c8 |
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
|
|
|
c5d8c8 |
+TEST $CLI volume set $V0 features.shard on
|
|
|
c5d8c8 |
+TEST $CLI volume set $V0 features.shard-block-size 4MB
|
|
|
c5d8c8 |
+TEST $CLI volume start $V0
|
|
|
c5d8c8 |
+TEST $CLI volume profile $V0 start
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+TEST fallocate -l 20M $M0/foo
|
|
|
c5d8c8 |
+gfid_new=$(get_gfid_string $M0/foo)
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+# Check for the base shard
|
|
|
c5d8c8 |
+TEST stat $M0/foo
|
|
|
c5d8c8 |
+TEST stat $B0/${V0}0/foo
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+# There should be 4 associated shards
|
|
|
c5d8c8 |
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
|
|
|
c5d8c8 |
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+# Delete the base shard and check shards get cleaned up
|
|
|
c5d8c8 |
+TEST unlink $M0/foo
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+TEST ! stat $M0/foo
|
|
|
c5d8c8 |
+TEST ! stat $B0/${V0}0/foo
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+# There should be no shards now
|
|
|
c5d8c8 |
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
|
|
|
c5d8c8 |
+cleanup
|
|
|
c5d8c8 |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
c5d8c8 |
index 2ba4528..a6ad1b8 100644
|
|
|
c5d8c8 |
--- a/xlators/features/shard/src/shard.c
|
|
|
c5d8c8 |
+++ b/xlators/features/shard/src/shard.c
|
|
|
c5d8c8 |
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
|
|
|
c5d8c8 |
}
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
int
|
|
|
c5d8c8 |
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
|
|
|
c5d8c8 |
+ xlator_t *this);
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+int
|
|
|
c5d8c8 |
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
|
|
|
c5d8c8 |
shard_post_resolve_fop_handler_t post_res_handler)
|
|
|
c5d8c8 |
{
|
|
|
c5d8c8 |
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
|
|
|
c5d8c8 |
inode_t *fsync_inode = NULL;
|
|
|
c5d8c8 |
shard_priv_t *priv = NULL;
|
|
|
c5d8c8 |
shard_local_t *local = NULL;
|
|
|
c5d8c8 |
+ uint64_t resolve_count = 0;
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
priv = this->private;
|
|
|
c5d8c8 |
local = frame->local;
|
|
|
c5d8c8 |
local->call_count = 0;
|
|
|
c5d8c8 |
shard_idx_iter = local->first_block;
|
|
|
c5d8c8 |
res_inode = local->resolver_base_inode;
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+ if ((local->op_ret < 0) || (local->resolve_not))
|
|
|
c5d8c8 |
+ goto out;
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+ /* If this prealloc FOP is for fresh file creation, then the size of the
|
|
|
c5d8c8 |
+ * file will be 0. Then there will be no shards associated with this file.
|
|
|
c5d8c8 |
+ * So we can skip the lookup process for the shards which do not exists
|
|
|
c5d8c8 |
+ * and directly issue mknod to crete shards.
|
|
|
c5d8c8 |
+ *
|
|
|
c5d8c8 |
+ * In case the prealloc fop is to extend the preallocated file to bigger
|
|
|
c5d8c8 |
+ * size then just lookup and populate inodes of existing shards and
|
|
|
c5d8c8 |
+ * update the create count
|
|
|
c5d8c8 |
+ */
|
|
|
c5d8c8 |
+ if (local->fop == GF_FOP_FALLOCATE) {
|
|
|
c5d8c8 |
+ if (!local->prebuf.ia_size) {
|
|
|
c5d8c8 |
+ local->inode_list[0] = inode_ref(res_inode);
|
|
|
c5d8c8 |
+ local->create_count = local->last_block;
|
|
|
c5d8c8 |
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
|
|
|
c5d8c8 |
+ return 0;
|
|
|
c5d8c8 |
+ }
|
|
|
c5d8c8 |
+ if (local->prebuf.ia_size < local->total_size)
|
|
|
c5d8c8 |
+ local->create_count = local->last_block -
|
|
|
c5d8c8 |
+ ((local->prebuf.ia_size - 1) /
|
|
|
c5d8c8 |
+ local->block_size);
|
|
|
c5d8c8 |
+ }
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
+ resolve_count = local->last_block - local->create_count;
|
|
|
c5d8c8 |
+
|
|
|
c5d8c8 |
if (res_inode)
|
|
|
c5d8c8 |
gf_uuid_copy(gfid, res_inode->gfid);
|
|
|
c5d8c8 |
else
|
|
|
c5d8c8 |
gf_uuid_copy(gfid, local->base_gfid);
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
- if ((local->op_ret < 0) || (local->resolve_not))
|
|
|
c5d8c8 |
- goto out;
|
|
|
c5d8c8 |
-
|
|
|
c5d8c8 |
- while (shard_idx_iter <= local->last_block) {
|
|
|
c5d8c8 |
+ while (shard_idx_iter <= resolve_count) {
|
|
|
c5d8c8 |
i++;
|
|
|
c5d8c8 |
if (shard_idx_iter == 0) {
|
|
|
c5d8c8 |
local->inode_list[i] = inode_ref(res_inode);
|
|
|
c5d8c8 |
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
c5d8c8 |
int count = 0;
|
|
|
c5d8c8 |
int call_count = 0;
|
|
|
c5d8c8 |
int32_t shard_idx_iter = 0;
|
|
|
c5d8c8 |
- int last_block = 0;
|
|
|
c5d8c8 |
+ int lookup_count = 0;
|
|
|
c5d8c8 |
char path[PATH_MAX] = {
|
|
|
c5d8c8 |
0,
|
|
|
c5d8c8 |
};
|
|
|
c5d8c8 |
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
c5d8c8 |
local = frame->local;
|
|
|
c5d8c8 |
count = call_count = local->call_count;
|
|
|
c5d8c8 |
shard_idx_iter = local->first_block;
|
|
|
c5d8c8 |
- last_block = local->last_block;
|
|
|
c5d8c8 |
+ lookup_count = local->last_block - local->create_count;
|
|
|
c5d8c8 |
local->pls_fop_handler = handler;
|
|
|
c5d8c8 |
if (local->lookup_shards_barriered)
|
|
|
c5d8c8 |
local->barrier.waitfor = local->call_count;
|
|
|
c5d8c8 |
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
c5d8c8 |
else
|
|
|
c5d8c8 |
gf_uuid_copy(gfid, local->base_gfid);
|
|
|
c5d8c8 |
|
|
|
c5d8c8 |
- while (shard_idx_iter <= last_block) {
|
|
|
c5d8c8 |
+ while (shard_idx_iter <= lookup_count) {
|
|
|
c5d8c8 |
if (local->inode_list[i]) {
|
|
|
c5d8c8 |
i++;
|
|
|
c5d8c8 |
shard_idx_iter++;
|
|
|
c5d8c8 |
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
|
|
|
c5d8c8 |
shard_common_lookup_shards(
|
|
|
c5d8c8 |
frame, this, local->resolver_base_inode,
|
|
|
c5d8c8 |
shard_common_inode_write_post_lookup_shards_handler);
|
|
|
c5d8c8 |
+ } else if (local->create_count) {
|
|
|
c5d8c8 |
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
|
|
|
c5d8c8 |
} else {
|
|
|
c5d8c8 |
shard_common_inode_write_do(frame, this);
|
|
|
c5d8c8 |
}
|
|
|
c5d8c8 |
--
|
|
|
c5d8c8 |
1.8.3.1
|
|
|
c5d8c8 |
|