c460ee
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
c460ee
From: Vinayakswami Hariharmath <vharihar@redhat.com>
c460ee
Date: Thu, 6 Aug 2020 14:39:59 +0530
c460ee
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
c460ee
 case of prealloc
c460ee
c460ee
Assume that we are preallocating a VM of size 1TB with a shard
c460ee
block size of 64MB then there will be ~16k shards.
c460ee
c460ee
This creation happens in 2 steps shard_fallocate() path i.e
c460ee
c460ee
1. lookup for the shards if any already present and
c460ee
2. mknod over those shards do not exist.
c460ee
c460ee
But in case of fresh creation, we dont have to lookup for all
c460ee
shards which are not present as the the file size will be 0.
c460ee
Through this, we can save lookup on all shards which are not
c460ee
present. This optimization is quite useful in the case of
c460ee
preallocating big vm.
c460ee
c460ee
Also if the file is already present and the call is to
c460ee
extend it to bigger size then we need not to lookup for non-
c460ee
existent shards. Just lookup preexisting shards, populate
c460ee
the inodes and issue mknod on extended size.
c460ee
c460ee
Backport of:
c460ee
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
c460ee
> Fixes: #1425
c460ee
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
c460ee
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
c460ee
c460ee
BUG: 1925425
c460ee
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
c460ee
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
c460ee
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
c460ee
Tested-by: RHGS Build Bot <nigelb@redhat.com>
c460ee
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
c460ee
---
c460ee
 tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
c460ee
 xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
c460ee
 2 files changed, 84 insertions(+), 7 deletions(-)
c460ee
 create mode 100644 tests/bugs/shard/issue-1425.t
c460ee
c460ee
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
c460ee
new file mode 100644
c460ee
index 0000000..bbe82c0
c460ee
--- /dev/null
c460ee
+++ b/tests/bugs/shard/issue-1425.t
c460ee
@@ -0,0 +1,45 @@
c460ee
+#!/bin/bash
c460ee
+
c460ee
+. $(dirname $0)/../../include.rc
c460ee
+. $(dirname $0)/../../volume.rc
c460ee
+
c460ee
+cleanup;
c460ee
+
c460ee
+FILE_COUNT_TIME=5
c460ee
+
c460ee
+function get_file_count {
c460ee
+    ls $1* | wc -l
c460ee
+}
c460ee
+
c460ee
+TEST glusterd
c460ee
+TEST pidof glusterd
c460ee
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
c460ee
+TEST $CLI volume set $V0 features.shard on
c460ee
+TEST $CLI volume set $V0 features.shard-block-size 4MB
c460ee
+TEST $CLI volume start $V0
c460ee
+TEST $CLI volume profile $V0 start
c460ee
+
c460ee
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
c460ee
+
c460ee
+TEST fallocate -l 20M $M0/foo
c460ee
+gfid_new=$(get_gfid_string $M0/foo)
c460ee
+
c460ee
+# Check for the base shard
c460ee
+TEST stat $M0/foo
c460ee
+TEST stat $B0/${V0}0/foo
c460ee
+
c460ee
+# There should be 4 associated shards
c460ee
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
c460ee
+
c460ee
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
c460ee
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
c460ee
+
c460ee
+# Delete the base shard and check shards get cleaned up
c460ee
+TEST unlink $M0/foo
c460ee
+
c460ee
+TEST ! stat $M0/foo
c460ee
+TEST ! stat $B0/${V0}0/foo
c460ee
+
c460ee
+# There should be no shards now
c460ee
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
c460ee
+cleanup
c460ee
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
c460ee
index 2ba4528..a6ad1b8 100644
c460ee
--- a/xlators/features/shard/src/shard.c
c460ee
+++ b/xlators/features/shard/src/shard.c
c460ee
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
c460ee
 }
c460ee
 
c460ee
 int
c460ee
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
c460ee
+                                                    xlator_t *this);
c460ee
+
c460ee
+int
c460ee
 shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
c460ee
                             shard_post_resolve_fop_handler_t post_res_handler)
c460ee
 {
c460ee
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
c460ee
     inode_t *fsync_inode = NULL;
c460ee
     shard_priv_t *priv = NULL;
c460ee
     shard_local_t *local = NULL;
c460ee
+    uint64_t resolve_count = 0;
c460ee
 
c460ee
     priv = this->private;
c460ee
     local = frame->local;
c460ee
     local->call_count = 0;
c460ee
     shard_idx_iter = local->first_block;
c460ee
     res_inode = local->resolver_base_inode;
c460ee
+
c460ee
+    if ((local->op_ret < 0) || (local->resolve_not))
c460ee
+        goto out;
c460ee
+
c460ee
+    /* If this prealloc FOP is for fresh file creation, then the size of the
c460ee
+     * file will be 0. Then there will be no shards associated with this file.
c460ee
+     * So we can skip the lookup process for the shards which do not exists
c460ee
+     * and directly issue mknod to crete shards.
c460ee
+     *
c460ee
+     * In case the prealloc fop is to extend the preallocated file to bigger
c460ee
+     * size then just lookup and populate inodes of existing shards and
c460ee
+     * update the create count
c460ee
+     */
c460ee
+    if (local->fop == GF_FOP_FALLOCATE) {
c460ee
+        if (!local->prebuf.ia_size) {
c460ee
+            local->inode_list[0] = inode_ref(res_inode);
c460ee
+            local->create_count = local->last_block;
c460ee
+            shard_common_inode_write_post_lookup_shards_handler(frame, this);
c460ee
+            return 0;
c460ee
+        }
c460ee
+        if (local->prebuf.ia_size < local->total_size)
c460ee
+            local->create_count = local->last_block -
c460ee
+                                  ((local->prebuf.ia_size - 1) /
c460ee
+                                   local->block_size);
c460ee
+    }
c460ee
+
c460ee
+    resolve_count = local->last_block - local->create_count;
c460ee
+
c460ee
     if (res_inode)
c460ee
         gf_uuid_copy(gfid, res_inode->gfid);
c460ee
     else
c460ee
         gf_uuid_copy(gfid, local->base_gfid);
c460ee
 
c460ee
-    if ((local->op_ret < 0) || (local->resolve_not))
c460ee
-        goto out;
c460ee
-
c460ee
-    while (shard_idx_iter <= local->last_block) {
c460ee
+    while (shard_idx_iter <= resolve_count) {
c460ee
         i++;
c460ee
         if (shard_idx_iter == 0) {
c460ee
             local->inode_list[i] = inode_ref(res_inode);
c460ee
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c460ee
     int count = 0;
c460ee
     int call_count = 0;
c460ee
     int32_t shard_idx_iter = 0;
c460ee
-    int last_block = 0;
c460ee
+    int lookup_count = 0;
c460ee
     char path[PATH_MAX] = {
c460ee
         0,
c460ee
     };
c460ee
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c460ee
     local = frame->local;
c460ee
     count = call_count = local->call_count;
c460ee
     shard_idx_iter = local->first_block;
c460ee
-    last_block = local->last_block;
c460ee
+    lookup_count = local->last_block - local->create_count;
c460ee
     local->pls_fop_handler = handler;
c460ee
     if (local->lookup_shards_barriered)
c460ee
         local->barrier.waitfor = local->call_count;
c460ee
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c460ee
     else
c460ee
         gf_uuid_copy(gfid, local->base_gfid);
c460ee
 
c460ee
-    while (shard_idx_iter <= last_block) {
c460ee
+    while (shard_idx_iter <= lookup_count) {
c460ee
         if (local->inode_list[i]) {
c460ee
             i++;
c460ee
             shard_idx_iter++;
c460ee
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
c460ee
         shard_common_lookup_shards(
c460ee
             frame, this, local->resolver_base_inode,
c460ee
             shard_common_inode_write_post_lookup_shards_handler);
c460ee
+    } else if (local->create_count) {
c460ee
+        shard_common_inode_write_post_lookup_shards_handler(frame, this);
c460ee
     } else {
c460ee
         shard_common_inode_write_do(frame, this);
c460ee
     }
c460ee
-- 
c460ee
1.8.3.1
c460ee