c5d8c8
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
c5d8c8
From: Vinayakswami Hariharmath <vharihar@redhat.com>
c5d8c8
Date: Thu, 6 Aug 2020 14:39:59 +0530
c5d8c8
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
c5d8c8
 case of prealloc
c5d8c8
c5d8c8
Assume that we are preallocating a VM of size 1TB with a shard
c5d8c8
block size of 64MB then there will be ~16k shards.
c5d8c8
c5d8c8
This creation happens in 2 steps shard_fallocate() path i.e
c5d8c8
c5d8c8
1. lookup for the shards if any already present and
c5d8c8
2. mknod over those shards do not exist.
c5d8c8
c5d8c8
But in case of fresh creation, we dont have to lookup for all
c5d8c8
shards which are not present as the the file size will be 0.
c5d8c8
Through this, we can save lookup on all shards which are not
c5d8c8
present. This optimization is quite useful in the case of
c5d8c8
preallocating big vm.
c5d8c8
c5d8c8
Also if the file is already present and the call is to
c5d8c8
extend it to bigger size then we need not to lookup for non-
c5d8c8
existent shards. Just lookup preexisting shards, populate
c5d8c8
the inodes and issue mknod on extended size.
c5d8c8
c5d8c8
Backport of:
c5d8c8
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
c5d8c8
> Fixes: #1425
c5d8c8
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
c5d8c8
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
c5d8c8
c5d8c8
BUG: 1925425
c5d8c8
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
c5d8c8
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
c5d8c8
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
c5d8c8
Tested-by: RHGS Build Bot <nigelb@redhat.com>
c5d8c8
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
c5d8c8
---
c5d8c8
 tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
c5d8c8
 xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
c5d8c8
 2 files changed, 84 insertions(+), 7 deletions(-)
c5d8c8
 create mode 100644 tests/bugs/shard/issue-1425.t
c5d8c8
c5d8c8
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
c5d8c8
new file mode 100644
c5d8c8
index 0000000..bbe82c0
c5d8c8
--- /dev/null
c5d8c8
+++ b/tests/bugs/shard/issue-1425.t
c5d8c8
@@ -0,0 +1,45 @@
c5d8c8
+#!/bin/bash
c5d8c8
+
c5d8c8
+. $(dirname $0)/../../include.rc
c5d8c8
+. $(dirname $0)/../../volume.rc
c5d8c8
+
c5d8c8
+cleanup;
c5d8c8
+
c5d8c8
+FILE_COUNT_TIME=5
c5d8c8
+
c5d8c8
+function get_file_count {
c5d8c8
+    ls $1* | wc -l
c5d8c8
+}
c5d8c8
+
c5d8c8
+TEST glusterd
c5d8c8
+TEST pidof glusterd
c5d8c8
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
c5d8c8
+TEST $CLI volume set $V0 features.shard on
c5d8c8
+TEST $CLI volume set $V0 features.shard-block-size 4MB
c5d8c8
+TEST $CLI volume start $V0
c5d8c8
+TEST $CLI volume profile $V0 start
c5d8c8
+
c5d8c8
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
c5d8c8
+
c5d8c8
+TEST fallocate -l 20M $M0/foo
c5d8c8
+gfid_new=$(get_gfid_string $M0/foo)
c5d8c8
+
c5d8c8
+# Check for the base shard
c5d8c8
+TEST stat $M0/foo
c5d8c8
+TEST stat $B0/${V0}0/foo
c5d8c8
+
c5d8c8
+# There should be 4 associated shards
c5d8c8
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
c5d8c8
+
c5d8c8
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
c5d8c8
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
c5d8c8
+
c5d8c8
+# Delete the base shard and check shards get cleaned up
c5d8c8
+TEST unlink $M0/foo
c5d8c8
+
c5d8c8
+TEST ! stat $M0/foo
c5d8c8
+TEST ! stat $B0/${V0}0/foo
c5d8c8
+
c5d8c8
+# There should be no shards now
c5d8c8
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
c5d8c8
+cleanup
c5d8c8
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
c5d8c8
index 2ba4528..a6ad1b8 100644
c5d8c8
--- a/xlators/features/shard/src/shard.c
c5d8c8
+++ b/xlators/features/shard/src/shard.c
c5d8c8
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
c5d8c8
 }
c5d8c8
 
c5d8c8
 int
c5d8c8
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
c5d8c8
+                                                    xlator_t *this);
c5d8c8
+
c5d8c8
+int
c5d8c8
 shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
c5d8c8
                             shard_post_resolve_fop_handler_t post_res_handler)
c5d8c8
 {
c5d8c8
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
c5d8c8
     inode_t *fsync_inode = NULL;
c5d8c8
     shard_priv_t *priv = NULL;
c5d8c8
     shard_local_t *local = NULL;
c5d8c8
+    uint64_t resolve_count = 0;
c5d8c8
 
c5d8c8
     priv = this->private;
c5d8c8
     local = frame->local;
c5d8c8
     local->call_count = 0;
c5d8c8
     shard_idx_iter = local->first_block;
c5d8c8
     res_inode = local->resolver_base_inode;
c5d8c8
+
c5d8c8
+    if ((local->op_ret < 0) || (local->resolve_not))
c5d8c8
+        goto out;
c5d8c8
+
c5d8c8
+    /* If this prealloc FOP is for fresh file creation, then the size of the
c5d8c8
+     * file will be 0. Then there will be no shards associated with this file.
c5d8c8
+     * So we can skip the lookup process for the shards which do not exists
c5d8c8
+     * and directly issue mknod to crete shards.
c5d8c8
+     *
c5d8c8
+     * In case the prealloc fop is to extend the preallocated file to bigger
c5d8c8
+     * size then just lookup and populate inodes of existing shards and
c5d8c8
+     * update the create count
c5d8c8
+     */
c5d8c8
+    if (local->fop == GF_FOP_FALLOCATE) {
c5d8c8
+        if (!local->prebuf.ia_size) {
c5d8c8
+            local->inode_list[0] = inode_ref(res_inode);
c5d8c8
+            local->create_count = local->last_block;
c5d8c8
+            shard_common_inode_write_post_lookup_shards_handler(frame, this);
c5d8c8
+            return 0;
c5d8c8
+        }
c5d8c8
+        if (local->prebuf.ia_size < local->total_size)
c5d8c8
+            local->create_count = local->last_block -
c5d8c8
+                                  ((local->prebuf.ia_size - 1) /
c5d8c8
+                                   local->block_size);
c5d8c8
+    }
c5d8c8
+
c5d8c8
+    resolve_count = local->last_block - local->create_count;
c5d8c8
+
c5d8c8
     if (res_inode)
c5d8c8
         gf_uuid_copy(gfid, res_inode->gfid);
c5d8c8
     else
c5d8c8
         gf_uuid_copy(gfid, local->base_gfid);
c5d8c8
 
c5d8c8
-    if ((local->op_ret < 0) || (local->resolve_not))
c5d8c8
-        goto out;
c5d8c8
-
c5d8c8
-    while (shard_idx_iter <= local->last_block) {
c5d8c8
+    while (shard_idx_iter <= resolve_count) {
c5d8c8
         i++;
c5d8c8
         if (shard_idx_iter == 0) {
c5d8c8
             local->inode_list[i] = inode_ref(res_inode);
c5d8c8
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c5d8c8
     int count = 0;
c5d8c8
     int call_count = 0;
c5d8c8
     int32_t shard_idx_iter = 0;
c5d8c8
-    int last_block = 0;
c5d8c8
+    int lookup_count = 0;
c5d8c8
     char path[PATH_MAX] = {
c5d8c8
         0,
c5d8c8
     };
c5d8c8
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c5d8c8
     local = frame->local;
c5d8c8
     count = call_count = local->call_count;
c5d8c8
     shard_idx_iter = local->first_block;
c5d8c8
-    last_block = local->last_block;
c5d8c8
+    lookup_count = local->last_block - local->create_count;
c5d8c8
     local->pls_fop_handler = handler;
c5d8c8
     if (local->lookup_shards_barriered)
c5d8c8
         local->barrier.waitfor = local->call_count;
c5d8c8
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
c5d8c8
     else
c5d8c8
         gf_uuid_copy(gfid, local->base_gfid);
c5d8c8
 
c5d8c8
-    while (shard_idx_iter <= last_block) {
c5d8c8
+    while (shard_idx_iter <= lookup_count) {
c5d8c8
         if (local->inode_list[i]) {
c5d8c8
             i++;
c5d8c8
             shard_idx_iter++;
c5d8c8
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
c5d8c8
         shard_common_lookup_shards(
c5d8c8
             frame, this, local->resolver_base_inode,
c5d8c8
             shard_common_inode_write_post_lookup_shards_handler);
c5d8c8
+    } else if (local->create_count) {
c5d8c8
+        shard_common_inode_write_post_lookup_shards_handler(frame, this);
c5d8c8
     } else {
c5d8c8
         shard_common_inode_write_do(frame, this);
c5d8c8
     }
c5d8c8
-- 
c5d8c8
1.8.3.1
c5d8c8