Blob Blame History Raw
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
From: Vinayakswami Hariharmath <vharihar@redhat.com>
Date: Thu, 6 Aug 2020 14:39:59 +0530
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
 case of prealloc

Assume that we are preallocating a VM of size 1TB with a shard
block size of 64MB then there will be ~16k shards.

This creation happens in 2 steps shard_fallocate() path i.e

1. lookup for the shards if any already present and
2. mknod over those shards do not exist.

But in case of fresh creation, we dont have to lookup for all
shards which are not present as the the file size will be 0.
Through this, we can save lookup on all shards which are not
present. This optimization is quite useful in the case of
preallocating big vm.

Also if the file is already present and the call is to
extend it to bigger size then we need not to lookup for non-
existent shards. Just lookup preexisting shards, populate
the inodes and issue mknod on extended size.

Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
> Fixes: #1425
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>

BUG: 1925425
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
 xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 7 deletions(-)
 create mode 100644 tests/bugs/shard/issue-1425.t

diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 0000000..bbe82c0
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+    ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 2ba4528..a6ad1b8 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
 }
 
 int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+                                                    xlator_t *this);
+
+int
 shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
                             shard_post_resolve_fop_handler_t post_res_handler)
 {
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
     inode_t *fsync_inode = NULL;
     shard_priv_t *priv = NULL;
     shard_local_t *local = NULL;
+    uint64_t resolve_count = 0;
 
     priv = this->private;
     local = frame->local;
     local->call_count = 0;
     shard_idx_iter = local->first_block;
     res_inode = local->resolver_base_inode;
+
+    if ((local->op_ret < 0) || (local->resolve_not))
+        goto out;
+
+    /* If this prealloc FOP is for fresh file creation, then the size of the
+     * file will be 0. Then there will be no shards associated with this file.
+     * So we can skip the lookup process for the shards which do not exists
+     * and directly issue mknod to crete shards.
+     *
+     * In case the prealloc fop is to extend the preallocated file to bigger
+     * size then just lookup and populate inodes of existing shards and
+     * update the create count
+     */
+    if (local->fop == GF_FOP_FALLOCATE) {
+        if (!local->prebuf.ia_size) {
+            local->inode_list[0] = inode_ref(res_inode);
+            local->create_count = local->last_block;
+            shard_common_inode_write_post_lookup_shards_handler(frame, this);
+            return 0;
+        }
+        if (local->prebuf.ia_size < local->total_size)
+            local->create_count = local->last_block -
+                                  ((local->prebuf.ia_size - 1) /
+                                   local->block_size);
+    }
+
+    resolve_count = local->last_block - local->create_count;
+
     if (res_inode)
         gf_uuid_copy(gfid, res_inode->gfid);
     else
         gf_uuid_copy(gfid, local->base_gfid);
 
-    if ((local->op_ret < 0) || (local->resolve_not))
-        goto out;
-
-    while (shard_idx_iter <= local->last_block) {
+    while (shard_idx_iter <= resolve_count) {
         i++;
         if (shard_idx_iter == 0) {
             local->inode_list[i] = inode_ref(res_inode);
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
     int count = 0;
     int call_count = 0;
     int32_t shard_idx_iter = 0;
-    int last_block = 0;
+    int lookup_count = 0;
     char path[PATH_MAX] = {
         0,
     };
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
     local = frame->local;
     count = call_count = local->call_count;
     shard_idx_iter = local->first_block;
-    last_block = local->last_block;
+    lookup_count = local->last_block - local->create_count;
     local->pls_fop_handler = handler;
     if (local->lookup_shards_barriered)
         local->barrier.waitfor = local->call_count;
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
     else
         gf_uuid_copy(gfid, local->base_gfid);
 
-    while (shard_idx_iter <= last_block) {
+    while (shard_idx_iter <= lookup_count) {
         if (local->inode_list[i]) {
             i++;
             shard_idx_iter++;
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
         shard_common_lookup_shards(
             frame, this, local->resolver_base_inode,
             shard_common_inode_write_post_lookup_shards_handler);
+    } else if (local->create_count) {
+        shard_common_inode_write_post_lookup_shards_handler(frame, this);
     } else {
         shard_common_inode_write_do(frame, this);
     }
-- 
1.8.3.1