256ebe
From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
256ebe
From: Krutika Dhananjay <kdhananj@redhat.com>
256ebe
Date: Wed, 8 May 2019 13:00:51 +0530
256ebe
Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
256ebe
 truncate to lower size
256ebe
256ebe
    > Upstream: https://review.gluster.org/22681
256ebe
    > BUG: 1705884
256ebe
    > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
256ebe
256ebe
The way delta_blocks is computed in shard is incorrect, when a file
256ebe
is truncated to a lower size. The accounting only considers change
256ebe
in size of the last of the truncated shards.
256ebe
256ebe
FIX:
256ebe
256ebe
Get the block-count of each shard just before an unlink at posix in
256ebe
xdata.  Their summation plus the change in size of last shard
256ebe
(from an actual truncate) is used to compute delta_blocks which is
256ebe
used in the xattrop for size update.
256ebe
256ebe
Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
256ebe
updates: bz#1668001
256ebe
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
256ebe
Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
256ebe
Tested-by: RHGS Build Bot <nigelb@redhat.com>
256ebe
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
256ebe
---
256ebe
 libglusterfs/src/glusterfs/glusterfs.h      |  2 +
256ebe
 tests/bugs/shard/bug-1705884.t              | 32 +++++++++++++++
256ebe
 xlators/features/shard/src/shard.c          | 60 +++++++++++++++++++++++------
256ebe
 xlators/features/shard/src/shard.h          |  2 +-
256ebe
 xlators/storage/posix/src/posix-entry-ops.c |  9 +++++
256ebe
 5 files changed, 92 insertions(+), 13 deletions(-)
256ebe
 create mode 100644 tests/bugs/shard/bug-1705884.t
256ebe
256ebe
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
256ebe
index 516b497..9ec2365 100644
256ebe
--- a/libglusterfs/src/glusterfs/glusterfs.h
256ebe
+++ b/libglusterfs/src/glusterfs/glusterfs.h
256ebe
@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
256ebe
 #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
256ebe
 #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
256ebe
 
256ebe
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
256ebe
+
256ebe
 #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
256ebe
 
256ebe
 #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
256ebe
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
256ebe
new file mode 100644
256ebe
index 0000000..f6e5037
256ebe
--- /dev/null
256ebe
+++ b/tests/bugs/shard/bug-1705884.t
256ebe
@@ -0,0 +1,32 @@
256ebe
+#!/bin/bash
256ebe
+
256ebe
+. $(dirname $0)/../../include.rc
256ebe
+. $(dirname $0)/../../volume.rc
256ebe
+. $(dirname $0)/../../fallocate.rc
256ebe
+
256ebe
+cleanup
256ebe
+
256ebe
+require_fallocate -l 1m $M0/file
256ebe
+
256ebe
+TEST glusterd
256ebe
+TEST pidof glusterd
256ebe
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
256ebe
+TEST $CLI volume set $V0 features.shard on
256ebe
+TEST $CLI volume set $V0 performance.write-behind off
256ebe
+TEST $CLI volume set $V0 performance.stat-prefetch off
256ebe
+TEST $CLI volume start $V0
256ebe
+
256ebe
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
256ebe
+
256ebe
+TEST fallocate -l 200M $M0/foo
256ebe
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
256ebe
+TEST truncate -s 0 $M0/foo
256ebe
+EXPECT "0" stat -c %b $M0/foo
256ebe
+TEST fallocate -l 100M $M0/foo
256ebe
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
256ebe
+
256ebe
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
256ebe
+TEST $CLI volume stop $V0
256ebe
+TEST $CLI volume delete $V0
256ebe
+
256ebe
+cleanup
256ebe
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
256ebe
index c1799ad..b248767 100644
256ebe
--- a/xlators/features/shard/src/shard.c
256ebe
+++ b/xlators/features/shard/src/shard.c
256ebe
@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
256ebe
 {
256ebe
     int ret = -1;
256ebe
     int64_t *size_attr = NULL;
256ebe
+    int64_t delta_blocks = 0;
256ebe
     inode_t *inode = NULL;
256ebe
     shard_local_t *local = NULL;
256ebe
     dict_t *xattr_req = NULL;
256ebe
@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
256ebe
 
256ebe
     /* If both size and block count have not changed, then skip the xattrop.
256ebe
      */
256ebe
-    if ((local->delta_size + local->hole_size == 0) &&
256ebe
-        (local->delta_blocks == 0)) {
256ebe
+    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
256ebe
+    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
256ebe
         goto out;
256ebe
     }
256ebe
 
256ebe
     ret = shard_set_size_attrs(local->delta_size + local->hole_size,
256ebe
-                               local->delta_blocks, &size_attr);
256ebe
+                               delta_blocks, &size_attr);
256ebe
     if (ret) {
256ebe
         gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
256ebe
                "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
256ebe
@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
256ebe
                               dict_t *xdata)
256ebe
 {
256ebe
     inode_t *inode = NULL;
256ebe
+    int64_t delta_blocks = 0;
256ebe
     shard_local_t *local = NULL;
256ebe
 
256ebe
     local = frame->local;
256ebe
@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
256ebe
     }
256ebe
 
256ebe
     local->postbuf.ia_size = local->offset;
256ebe
-    local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
256ebe
     /* Let the delta be negative. We want xattrop to do subtraction */
256ebe
     local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
256ebe
-    local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
256ebe
+    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
256ebe
+                                 postbuf->ia_blocks - prebuf->ia_blocks);
256ebe
+    GF_ASSERT(delta_blocks <= 0);
256ebe
+    local->postbuf.ia_blocks += delta_blocks;
256ebe
     local->hole_size = 0;
256ebe
 
256ebe
-    shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
256ebe
-
256ebe
+    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
256ebe
     shard_update_file_size(frame, this, NULL, &local->loc,
256ebe
                            shard_post_update_size_truncate_handler);
256ebe
     return 0;
256ebe
@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
256ebe
                         struct iatt *preparent, struct iatt *postparent,
256ebe
                         dict_t *xdata)
256ebe
 {
256ebe
+    int ret = 0;
256ebe
     int call_count = 0;
256ebe
     int shard_block_num = (long)cookie;
256ebe
+    uint64_t block_count = 0;
256ebe
     shard_local_t *local = NULL;
256ebe
 
256ebe
     local = frame->local;
256ebe
@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
256ebe
         local->op_errno = op_errno;
256ebe
         goto done;
256ebe
     }
256ebe
+    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
256ebe
+    if (!ret) {
256ebe
+        GF_ATOMIC_SUB(local->delta_blocks, block_count);
256ebe
+    } else {
256ebe
+        /* dict_get failed possibly due to a heterogeneous cluster? */
256ebe
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
256ebe
+               "Failed to get key %s from dict during truncate of gfid %s",
256ebe
+               GF_GET_FILE_BLOCK_COUNT,
256ebe
+               uuid_utoa(local->resolver_base_inode->gfid));
256ebe
+    }
256ebe
 
256ebe
     shard_unlink_block_inode(local, shard_block_num);
256ebe
 done:
256ebe
@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
256ebe
     gf_boolean_t wind_failed = _gf_false;
256ebe
     shard_local_t *local = NULL;
256ebe
     shard_priv_t *priv = NULL;
256ebe
+    dict_t *xdata_req = NULL;
256ebe
 
256ebe
     local = frame->local;
256ebe
     priv = this->private;
256ebe
@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
256ebe
         local->postbuf.ia_size = local->offset;
256ebe
         local->postbuf.ia_blocks = local->prebuf.ia_blocks;
256ebe
         local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
256ebe
-        local->delta_blocks = 0;
256ebe
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
         local->hole_size = 0;
256ebe
         shard_update_file_size(frame, this, local->fd, &local->loc,
256ebe
                                shard_post_update_size_truncate_handler);
256ebe
@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
256ebe
 
256ebe
     local->call_count = call_count;
256ebe
     i = 1;
256ebe
+    xdata_req = dict_new();
256ebe
+    if (!xdata_req) {
256ebe
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
256ebe
+        return 0;
256ebe
+    }
256ebe
+    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
256ebe
+    if (ret) {
256ebe
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
256ebe
+               "Failed to set key %s into dict during truncate of %s",
256ebe
+               GF_GET_FILE_BLOCK_COUNT,
256ebe
+               uuid_utoa(local->resolver_base_inode->gfid));
256ebe
+        dict_unref(xdata_req);
256ebe
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
256ebe
+        return 0;
256ebe
+    }
256ebe
 
256ebe
     SHARD_SET_ROOT_FS_ID(frame, local);
256ebe
     while (cur_block <= last_block) {
256ebe
@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
256ebe
 
256ebe
         STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
256ebe
                           (void *)(long)cur_block, FIRST_CHILD(this),
256ebe
-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
256ebe
+                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
256ebe
         loc_wipe(&loc;;
256ebe
     next:
256ebe
         i++;
256ebe
@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
256ebe
         if (!--call_count)
256ebe
             break;
256ebe
     }
256ebe
+    dict_unref(xdata_req);
256ebe
     return 0;
256ebe
 }
256ebe
 
256ebe
@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
256ebe
          */
256ebe
         local->hole_size = local->offset - local->prebuf.ia_size;
256ebe
         local->delta_size = 0;
256ebe
-        local->delta_blocks = 0;
256ebe
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
         local->postbuf.ia_size = local->offset;
256ebe
         tmp_stbuf.ia_size = local->offset;
256ebe
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
256ebe
@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
256ebe
          */
256ebe
         local->hole_size = 0;
256ebe
         local->delta_size = (local->offset - local->prebuf.ia_size);
256ebe
-        local->delta_blocks = 0;
256ebe
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
         tmp_stbuf.ia_size = local->offset;
256ebe
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
256ebe
                             SHARD_INODE_WRITE_MASK);
256ebe
@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
256ebe
     if (!local->xattr_req)
256ebe
         goto err;
256ebe
     local->resolver_base_inode = loc->inode;
256ebe
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
 
256ebe
     shard_lookup_base_file(frame, this, &local->loc,
256ebe
                            shard_post_lookup_truncate_handler);
256ebe
@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
256ebe
     local->loc.inode = inode_ref(fd->inode);
256ebe
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
256ebe
     local->resolver_base_inode = fd->inode;
256ebe
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
 
256ebe
     shard_lookup_base_file(frame, this, &local->loc,
256ebe
                            shard_post_lookup_truncate_handler);
256ebe
@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
256ebe
             local->op_errno = op_errno;
256ebe
         } else {
256ebe
             local->written_size += op_ret;
256ebe
-            local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
256ebe
+            GF_ATOMIC_ADD(local->delta_blocks,
256ebe
+                          post->ia_blocks - pre->ia_blocks);
256ebe
             local->delta_size += (post->ia_size - pre->ia_size);
256ebe
             shard_inode_ctx_set(local->fd->inode, this, post, 0,
256ebe
                                 SHARD_MASK_TIMES);
256ebe
@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
256ebe
     local->fd = fd_ref(fd);
256ebe
     local->block_size = block_size;
256ebe
     local->resolver_base_inode = local->fd->inode;
256ebe
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
256ebe
 
256ebe
     local->loc.inode = inode_ref(fd->inode);
256ebe
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
256ebe
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
256ebe
index cd6a663..04abd62 100644
256ebe
--- a/xlators/features/shard/src/shard.h
256ebe
+++ b/xlators/features/shard/src/shard.h
256ebe
@@ -275,7 +275,7 @@ typedef struct shard_local {
256ebe
     size_t req_size;
256ebe
     size_t readdir_size;
256ebe
     int64_t delta_size;
256ebe
-    int64_t delta_blocks;
256ebe
+    gf_atomic_t delta_blocks;
256ebe
     loc_t loc;
256ebe
     loc_t dot_shard_loc;
256ebe
     loc_t dot_shard_rm_loc;
256ebe
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
256ebe
index b24a052..34ee2b8 100644
256ebe
--- a/xlators/storage/posix/src/posix-entry-ops.c
256ebe
+++ b/xlators/storage/posix/src/posix-entry-ops.c
256ebe
@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
256ebe
     char *real_path = NULL;
256ebe
     char *par_path = NULL;
256ebe
     int32_t fd = -1;
256ebe
+    int ret = -1;
256ebe
     struct iatt stbuf = {
256ebe
         0,
256ebe
     };
256ebe
@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
256ebe
         goto out;
256ebe
     }
256ebe
 
256ebe
+    if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
256ebe
+        ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
256ebe
+                              stbuf.ia_blocks);
256ebe
+        if (ret)
256ebe
+            gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
256ebe
+                   "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
256ebe
+    }
256ebe
+
256ebe
     if (xdata && dict_get(xdata, GET_LINK_COUNT))
256ebe
         get_link_count = _gf_true;
256ebe
     op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
256ebe
-- 
256ebe
1.8.3.1
256ebe