50dc83
From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
50dc83
From: Krutika Dhananjay <kdhananj@redhat.com>
50dc83
Date: Wed, 8 May 2019 13:00:51 +0530
50dc83
Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
50dc83
 truncate to lower size
50dc83
50dc83
    > Upstream: https://review.gluster.org/22681
50dc83
    > BUG: 1705884
50dc83
    > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
50dc83
50dc83
The way delta_blocks is computed in shard is incorrect, when a file
50dc83
is truncated to a lower size. The accounting only considers change
50dc83
in size of the last of the truncated shards.
50dc83
50dc83
FIX:
50dc83
50dc83
Get the block-count of each shard just before an unlink at posix in
50dc83
xdata.  Their summation plus the change in size of last shard
50dc83
(from an actual truncate) is used to compute delta_blocks which is
50dc83
used in the xattrop for size update.
50dc83
50dc83
Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
50dc83
updates: bz#1668001
50dc83
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
50dc83
---
50dc83
 libglusterfs/src/glusterfs/glusterfs.h      |  2 +
50dc83
 tests/bugs/shard/bug-1705884.t              | 32 +++++++++++++++
50dc83
 xlators/features/shard/src/shard.c          | 60 +++++++++++++++++++++++------
50dc83
 xlators/features/shard/src/shard.h          |  2 +-
50dc83
 xlators/storage/posix/src/posix-entry-ops.c |  9 +++++
50dc83
 5 files changed, 92 insertions(+), 13 deletions(-)
50dc83
 create mode 100644 tests/bugs/shard/bug-1705884.t
50dc83
50dc83
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
50dc83
index 516b497..9ec2365 100644
50dc83
--- a/libglusterfs/src/glusterfs/glusterfs.h
50dc83
+++ b/libglusterfs/src/glusterfs/glusterfs.h
50dc83
@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
50dc83
 #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
50dc83
 #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
50dc83
 
50dc83
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
50dc83
+
50dc83
 #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
50dc83
 
50dc83
 #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
50dc83
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
50dc83
new file mode 100644
50dc83
index 0000000..f6e5037
50dc83
--- /dev/null
50dc83
+++ b/tests/bugs/shard/bug-1705884.t
50dc83
@@ -0,0 +1,32 @@
50dc83
+#!/bin/bash
50dc83
+
50dc83
+. $(dirname $0)/../../include.rc
50dc83
+. $(dirname $0)/../../volume.rc
50dc83
+. $(dirname $0)/../../fallocate.rc
50dc83
+
50dc83
+cleanup
50dc83
+
50dc83
+require_fallocate -l 1m $M0/file
50dc83
+
50dc83
+TEST glusterd
50dc83
+TEST pidof glusterd
50dc83
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
50dc83
+TEST $CLI volume set $V0 features.shard on
50dc83
+TEST $CLI volume set $V0 performance.write-behind off
50dc83
+TEST $CLI volume set $V0 performance.stat-prefetch off
50dc83
+TEST $CLI volume start $V0
50dc83
+
50dc83
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
50dc83
+
50dc83
+TEST fallocate -l 200M $M0/foo
50dc83
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
50dc83
+TEST truncate -s 0 $M0/foo
50dc83
+EXPECT "0" stat -c %b $M0/foo
50dc83
+TEST fallocate -l 100M $M0/foo
50dc83
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
50dc83
+
50dc83
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
50dc83
+TEST $CLI volume stop $V0
50dc83
+TEST $CLI volume delete $V0
50dc83
+
50dc83
+cleanup
50dc83
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
50dc83
index c1799ad..b248767 100644
50dc83
--- a/xlators/features/shard/src/shard.c
50dc83
+++ b/xlators/features/shard/src/shard.c
50dc83
@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
50dc83
 {
50dc83
     int ret = -1;
50dc83
     int64_t *size_attr = NULL;
50dc83
+    int64_t delta_blocks = 0;
50dc83
     inode_t *inode = NULL;
50dc83
     shard_local_t *local = NULL;
50dc83
     dict_t *xattr_req = NULL;
50dc83
@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
50dc83
 
50dc83
     /* If both size and block count have not changed, then skip the xattrop.
50dc83
      */
50dc83
-    if ((local->delta_size + local->hole_size == 0) &&
50dc83
-        (local->delta_blocks == 0)) {
50dc83
+    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
50dc83
+    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
50dc83
         goto out;
50dc83
     }
50dc83
 
50dc83
     ret = shard_set_size_attrs(local->delta_size + local->hole_size,
50dc83
-                               local->delta_blocks, &size_attr);
50dc83
+                               delta_blocks, &size_attr);
50dc83
     if (ret) {
50dc83
         gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
50dc83
                "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
50dc83
@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
50dc83
                               dict_t *xdata)
50dc83
 {
50dc83
     inode_t *inode = NULL;
50dc83
+    int64_t delta_blocks = 0;
50dc83
     shard_local_t *local = NULL;
50dc83
 
50dc83
     local = frame->local;
50dc83
@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
50dc83
     }
50dc83
 
50dc83
     local->postbuf.ia_size = local->offset;
50dc83
-    local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
50dc83
     /* Let the delta be negative. We want xattrop to do subtraction */
50dc83
     local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
50dc83
-    local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
50dc83
+    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
50dc83
+                                 postbuf->ia_blocks - prebuf->ia_blocks);
50dc83
+    GF_ASSERT(delta_blocks <= 0);
50dc83
+    local->postbuf.ia_blocks += delta_blocks;
50dc83
     local->hole_size = 0;
50dc83
 
50dc83
-    shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
50dc83
-
50dc83
+    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
50dc83
     shard_update_file_size(frame, this, NULL, &local->loc,
50dc83
                            shard_post_update_size_truncate_handler);
50dc83
     return 0;
50dc83
@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
50dc83
                         struct iatt *preparent, struct iatt *postparent,
50dc83
                         dict_t *xdata)
50dc83
 {
50dc83
+    int ret = 0;
50dc83
     int call_count = 0;
50dc83
     int shard_block_num = (long)cookie;
50dc83
+    uint64_t block_count = 0;
50dc83
     shard_local_t *local = NULL;
50dc83
 
50dc83
     local = frame->local;
50dc83
@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
50dc83
         local->op_errno = op_errno;
50dc83
         goto done;
50dc83
     }
50dc83
+    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
50dc83
+    if (!ret) {
50dc83
+        GF_ATOMIC_SUB(local->delta_blocks, block_count);
50dc83
+    } else {
50dc83
+        /* dict_get failed possibly due to a heterogeneous cluster? */
50dc83
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
50dc83
+               "Failed to get key %s from dict during truncate of gfid %s",
50dc83
+               GF_GET_FILE_BLOCK_COUNT,
50dc83
+               uuid_utoa(local->resolver_base_inode->gfid));
50dc83
+    }
50dc83
 
50dc83
     shard_unlink_block_inode(local, shard_block_num);
50dc83
 done:
50dc83
@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
50dc83
     gf_boolean_t wind_failed = _gf_false;
50dc83
     shard_local_t *local = NULL;
50dc83
     shard_priv_t *priv = NULL;
50dc83
+    dict_t *xdata_req = NULL;
50dc83
 
50dc83
     local = frame->local;
50dc83
     priv = this->private;
50dc83
@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
50dc83
         local->postbuf.ia_size = local->offset;
50dc83
         local->postbuf.ia_blocks = local->prebuf.ia_blocks;
50dc83
         local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
50dc83
-        local->delta_blocks = 0;
50dc83
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
         local->hole_size = 0;
50dc83
         shard_update_file_size(frame, this, local->fd, &local->loc,
50dc83
                                shard_post_update_size_truncate_handler);
50dc83
@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
50dc83
 
50dc83
     local->call_count = call_count;
50dc83
     i = 1;
50dc83
+    xdata_req = dict_new();
50dc83
+    if (!xdata_req) {
50dc83
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
50dc83
+        return 0;
50dc83
+    }
50dc83
+    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
50dc83
+    if (ret) {
50dc83
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
50dc83
+               "Failed to set key %s into dict during truncate of %s",
50dc83
+               GF_GET_FILE_BLOCK_COUNT,
50dc83
+               uuid_utoa(local->resolver_base_inode->gfid));
50dc83
+        dict_unref(xdata_req);
50dc83
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
50dc83
+        return 0;
50dc83
+    }
50dc83
 
50dc83
     SHARD_SET_ROOT_FS_ID(frame, local);
50dc83
     while (cur_block <= last_block) {
50dc83
@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
50dc83
 
50dc83
         STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
50dc83
                           (void *)(long)cur_block, FIRST_CHILD(this),
50dc83
-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
50dc83
+                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
50dc83
         loc_wipe(&loc;;
50dc83
     next:
50dc83
         i++;
50dc83
@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
50dc83
         if (!--call_count)
50dc83
             break;
50dc83
     }
50dc83
+    dict_unref(xdata_req);
50dc83
     return 0;
50dc83
 }
50dc83
 
50dc83
@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
50dc83
          */
50dc83
         local->hole_size = local->offset - local->prebuf.ia_size;
50dc83
         local->delta_size = 0;
50dc83
-        local->delta_blocks = 0;
50dc83
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
         local->postbuf.ia_size = local->offset;
50dc83
         tmp_stbuf.ia_size = local->offset;
50dc83
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
50dc83
@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
50dc83
          */
50dc83
         local->hole_size = 0;
50dc83
         local->delta_size = (local->offset - local->prebuf.ia_size);
50dc83
-        local->delta_blocks = 0;
50dc83
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
         tmp_stbuf.ia_size = local->offset;
50dc83
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
50dc83
                             SHARD_INODE_WRITE_MASK);
50dc83
@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
50dc83
     if (!local->xattr_req)
50dc83
         goto err;
50dc83
     local->resolver_base_inode = loc->inode;
50dc83
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
 
50dc83
     shard_lookup_base_file(frame, this, &local->loc,
50dc83
                            shard_post_lookup_truncate_handler);
50dc83
@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
50dc83
     local->loc.inode = inode_ref(fd->inode);
50dc83
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
50dc83
     local->resolver_base_inode = fd->inode;
50dc83
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
 
50dc83
     shard_lookup_base_file(frame, this, &local->loc,
50dc83
                            shard_post_lookup_truncate_handler);
50dc83
@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
50dc83
             local->op_errno = op_errno;
50dc83
         } else {
50dc83
             local->written_size += op_ret;
50dc83
-            local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
50dc83
+            GF_ATOMIC_ADD(local->delta_blocks,
50dc83
+                          post->ia_blocks - pre->ia_blocks);
50dc83
             local->delta_size += (post->ia_size - pre->ia_size);
50dc83
             shard_inode_ctx_set(local->fd->inode, this, post, 0,
50dc83
                                 SHARD_MASK_TIMES);
50dc83
@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
50dc83
     local->fd = fd_ref(fd);
50dc83
     local->block_size = block_size;
50dc83
     local->resolver_base_inode = local->fd->inode;
50dc83
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
50dc83
 
50dc83
     local->loc.inode = inode_ref(fd->inode);
50dc83
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
50dc83
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
50dc83
index cd6a663..04abd62 100644
50dc83
--- a/xlators/features/shard/src/shard.h
50dc83
+++ b/xlators/features/shard/src/shard.h
50dc83
@@ -275,7 +275,7 @@ typedef struct shard_local {
50dc83
     size_t req_size;
50dc83
     size_t readdir_size;
50dc83
     int64_t delta_size;
50dc83
-    int64_t delta_blocks;
50dc83
+    gf_atomic_t delta_blocks;
50dc83
     loc_t loc;
50dc83
     loc_t dot_shard_loc;
50dc83
     loc_t dot_shard_rm_loc;
50dc83
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
50dc83
index b24a052..34ee2b8 100644
50dc83
--- a/xlators/storage/posix/src/posix-entry-ops.c
50dc83
+++ b/xlators/storage/posix/src/posix-entry-ops.c
50dc83
@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
50dc83
     char *real_path = NULL;
50dc83
     char *par_path = NULL;
50dc83
     int32_t fd = -1;
50dc83
+    int ret = -1;
50dc83
     struct iatt stbuf = {
50dc83
         0,
50dc83
     };
50dc83
@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
50dc83
         goto out;
50dc83
     }
50dc83
 
50dc83
+    if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
50dc83
+        ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
50dc83
+                              stbuf.ia_blocks);
50dc83
+        if (ret)
50dc83
+            gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
50dc83
+                   "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
50dc83
+    }
50dc83
+
50dc83
     if (xdata && dict_get(xdata, GET_LINK_COUNT))
50dc83
         get_link_count = _gf_true;
50dc83
     op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
50dc83
-- 
50dc83
1.8.3.1
50dc83