d2787b
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
d2787b
From: Vinayakswami Hariharmath <vharihar@redhat.com>
d2787b
Date: Thu, 6 Aug 2020 14:39:59 +0530
d2787b
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
d2787b
 case of prealloc
d2787b
d2787b
Assume that we are preallocating a VM of size 1TB with a shard
d2787b
block size of 64MB then there will be ~16k shards.
d2787b
d2787b
This creation happens in 2 steps shard_fallocate() path i.e
d2787b
d2787b
1. lookup for the shards if any already present and
d2787b
2. mknod over those shards do not exist.
d2787b
d2787b
But in case of fresh creation, we dont have to lookup for all
d2787b
shards which are not present as the the file size will be 0.
d2787b
Through this, we can save lookup on all shards which are not
d2787b
present. This optimization is quite useful in the case of
d2787b
preallocating big vm.
d2787b
d2787b
Also if the file is already present and the call is to
d2787b
extend it to bigger size then we need not to lookup for non-
d2787b
existent shards. Just lookup preexisting shards, populate
d2787b
the inodes and issue mknod on extended size.
d2787b
d2787b
Backport of:
d2787b
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
d2787b
> Fixes: #1425
d2787b
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
d2787b
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
d2787b
d2787b
BUG: 1925425
d2787b
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
d2787b
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
d2787b
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
d2787b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d2787b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d2787b
---
d2787b
 tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
d2787b
 xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
d2787b
 2 files changed, 84 insertions(+), 7 deletions(-)
d2787b
 create mode 100644 tests/bugs/shard/issue-1425.t
d2787b
d2787b
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
d2787b
new file mode 100644
d2787b
index 0000000..bbe82c0
d2787b
--- /dev/null
d2787b
+++ b/tests/bugs/shard/issue-1425.t
d2787b
@@ -0,0 +1,45 @@
d2787b
+#!/bin/bash
d2787b
+
d2787b
+. $(dirname $0)/../../include.rc
d2787b
+. $(dirname $0)/../../volume.rc
d2787b
+
d2787b
+cleanup;
d2787b
+
d2787b
+FILE_COUNT_TIME=5
d2787b
+
d2787b
+function get_file_count {
d2787b
+    ls $1* | wc -l
d2787b
+}
d2787b
+
d2787b
+TEST glusterd
d2787b
+TEST pidof glusterd
d2787b
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
d2787b
+TEST $CLI volume set $V0 features.shard on
d2787b
+TEST $CLI volume set $V0 features.shard-block-size 4MB
d2787b
+TEST $CLI volume start $V0
d2787b
+TEST $CLI volume profile $V0 start
d2787b
+
d2787b
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
d2787b
+
d2787b
+TEST fallocate -l 20M $M0/foo
d2787b
+gfid_new=$(get_gfid_string $M0/foo)
d2787b
+
d2787b
+# Check for the base shard
d2787b
+TEST stat $M0/foo
d2787b
+TEST stat $B0/${V0}0/foo
d2787b
+
d2787b
+# There should be 4 associated shards
d2787b
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
d2787b
+
d2787b
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
d2787b
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
d2787b
+
d2787b
+# Delete the base shard and check shards get cleaned up
d2787b
+TEST unlink $M0/foo
d2787b
+
d2787b
+TEST ! stat $M0/foo
d2787b
+TEST ! stat $B0/${V0}0/foo
d2787b
+
d2787b
+# There should be no shards now
d2787b
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
d2787b
+cleanup
d2787b
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
d2787b
index 2ba4528..a6ad1b8 100644
d2787b
--- a/xlators/features/shard/src/shard.c
d2787b
+++ b/xlators/features/shard/src/shard.c
d2787b
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
d2787b
 }
d2787b
 
d2787b
 int
d2787b
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
d2787b
+                                                    xlator_t *this);
d2787b
+
d2787b
+int
d2787b
 shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
d2787b
                             shard_post_resolve_fop_handler_t post_res_handler)
d2787b
 {
d2787b
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
d2787b
     inode_t *fsync_inode = NULL;
d2787b
     shard_priv_t *priv = NULL;
d2787b
     shard_local_t *local = NULL;
d2787b
+    uint64_t resolve_count = 0;
d2787b
 
d2787b
     priv = this->private;
d2787b
     local = frame->local;
d2787b
     local->call_count = 0;
d2787b
     shard_idx_iter = local->first_block;
d2787b
     res_inode = local->resolver_base_inode;
d2787b
+
d2787b
+    if ((local->op_ret < 0) || (local->resolve_not))
d2787b
+        goto out;
d2787b
+
d2787b
+    /* If this prealloc FOP is for fresh file creation, then the size of the
d2787b
+     * file will be 0. Then there will be no shards associated with this file.
d2787b
+     * So we can skip the lookup process for the shards which do not exists
d2787b
+     * and directly issue mknod to crete shards.
d2787b
+     *
d2787b
+     * In case the prealloc fop is to extend the preallocated file to bigger
d2787b
+     * size then just lookup and populate inodes of existing shards and
d2787b
+     * update the create count
d2787b
+     */
d2787b
+    if (local->fop == GF_FOP_FALLOCATE) {
d2787b
+        if (!local->prebuf.ia_size) {
d2787b
+            local->inode_list[0] = inode_ref(res_inode);
d2787b
+            local->create_count = local->last_block;
d2787b
+            shard_common_inode_write_post_lookup_shards_handler(frame, this);
d2787b
+            return 0;
d2787b
+        }
d2787b
+        if (local->prebuf.ia_size < local->total_size)
d2787b
+            local->create_count = local->last_block -
d2787b
+                                  ((local->prebuf.ia_size - 1) /
d2787b
+                                   local->block_size);
d2787b
+    }
d2787b
+
d2787b
+    resolve_count = local->last_block - local->create_count;
d2787b
+
d2787b
     if (res_inode)
d2787b
         gf_uuid_copy(gfid, res_inode->gfid);
d2787b
     else
d2787b
         gf_uuid_copy(gfid, local->base_gfid);
d2787b
 
d2787b
-    if ((local->op_ret < 0) || (local->resolve_not))
d2787b
-        goto out;
d2787b
-
d2787b
-    while (shard_idx_iter <= local->last_block) {
d2787b
+    while (shard_idx_iter <= resolve_count) {
d2787b
         i++;
d2787b
         if (shard_idx_iter == 0) {
d2787b
             local->inode_list[i] = inode_ref(res_inode);
d2787b
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
d2787b
     int count = 0;
d2787b
     int call_count = 0;
d2787b
     int32_t shard_idx_iter = 0;
d2787b
-    int last_block = 0;
d2787b
+    int lookup_count = 0;
d2787b
     char path[PATH_MAX] = {
d2787b
         0,
d2787b
     };
d2787b
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
d2787b
     local = frame->local;
d2787b
     count = call_count = local->call_count;
d2787b
     shard_idx_iter = local->first_block;
d2787b
-    last_block = local->last_block;
d2787b
+    lookup_count = local->last_block - local->create_count;
d2787b
     local->pls_fop_handler = handler;
d2787b
     if (local->lookup_shards_barriered)
d2787b
         local->barrier.waitfor = local->call_count;
d2787b
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
d2787b
     else
d2787b
         gf_uuid_copy(gfid, local->base_gfid);
d2787b
 
d2787b
-    while (shard_idx_iter <= last_block) {
d2787b
+    while (shard_idx_iter <= lookup_count) {
d2787b
         if (local->inode_list[i]) {
d2787b
             i++;
d2787b
             shard_idx_iter++;
d2787b
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
d2787b
         shard_common_lookup_shards(
d2787b
             frame, this, local->resolver_base_inode,
d2787b
             shard_common_inode_write_post_lookup_shards_handler);
d2787b
+    } else if (local->create_count) {
d2787b
+        shard_common_inode_write_post_lookup_shards_handler(frame, this);
d2787b
     } else {
d2787b
         shard_common_inode_write_do(frame, this);
d2787b
     }
d2787b
-- 
d2787b
1.8.3.1
d2787b