e3c68b
From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
e3c68b
From: Krutika Dhananjay <kdhananj@redhat.com>
e3c68b
Date: Wed, 8 May 2019 13:00:51 +0530
e3c68b
Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
e3c68b
 truncate to lower size
e3c68b
e3c68b
    > Upstream: https://review.gluster.org/22681
e3c68b
    > BUG: 1705884
e3c68b
    > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
e3c68b
e3c68b
The way delta_blocks is computed in shard is incorrect, when a file
e3c68b
is truncated to a lower size. The accounting only considers change
e3c68b
in size of the last of the truncated shards.
e3c68b
e3c68b
FIX:
e3c68b
e3c68b
Get the block-count of each shard just before an unlink at posix in
e3c68b
xdata.  Their summation plus the change in size of last shard
e3c68b
(from an actual truncate) is used to compute delta_blocks which is
e3c68b
used in the xattrop for size update.
e3c68b
e3c68b
Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
e3c68b
updates: bz#1668001
e3c68b
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e3c68b
---
e3c68b
 libglusterfs/src/glusterfs/glusterfs.h      |  2 +
e3c68b
 tests/bugs/shard/bug-1705884.t              | 32 +++++++++++++++
e3c68b
 xlators/features/shard/src/shard.c          | 60 +++++++++++++++++++++++------
e3c68b
 xlators/features/shard/src/shard.h          |  2 +-
e3c68b
 xlators/storage/posix/src/posix-entry-ops.c |  9 +++++
e3c68b
 5 files changed, 92 insertions(+), 13 deletions(-)
e3c68b
 create mode 100644 tests/bugs/shard/bug-1705884.t
e3c68b
e3c68b
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
e3c68b
index 516b497..9ec2365 100644
e3c68b
--- a/libglusterfs/src/glusterfs/glusterfs.h
e3c68b
+++ b/libglusterfs/src/glusterfs/glusterfs.h
e3c68b
@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
e3c68b
 #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
e3c68b
 #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
e3c68b
 
e3c68b
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
e3c68b
+
e3c68b
 #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
e3c68b
 
e3c68b
 #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
e3c68b
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
e3c68b
new file mode 100644
e3c68b
index 0000000..f6e5037
e3c68b
--- /dev/null
e3c68b
+++ b/tests/bugs/shard/bug-1705884.t
e3c68b
@@ -0,0 +1,32 @@
e3c68b
+#!/bin/bash
e3c68b
+
e3c68b
+. $(dirname $0)/../../include.rc
e3c68b
+. $(dirname $0)/../../volume.rc
e3c68b
+. $(dirname $0)/../../fallocate.rc
e3c68b
+
e3c68b
+cleanup
e3c68b
+
e3c68b
+require_fallocate -l 1m $M0/file
e3c68b
+
e3c68b
+TEST glusterd
e3c68b
+TEST pidof glusterd
e3c68b
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
e3c68b
+TEST $CLI volume set $V0 features.shard on
e3c68b
+TEST $CLI volume set $V0 performance.write-behind off
e3c68b
+TEST $CLI volume set $V0 performance.stat-prefetch off
e3c68b
+TEST $CLI volume start $V0
e3c68b
+
e3c68b
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
e3c68b
+
e3c68b
+TEST fallocate -l 200M $M0/foo
e3c68b
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
e3c68b
+TEST truncate -s 0 $M0/foo
e3c68b
+EXPECT "0" stat -c %b $M0/foo
e3c68b
+TEST fallocate -l 100M $M0/foo
e3c68b
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
e3c68b
+
e3c68b
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
e3c68b
+TEST $CLI volume stop $V0
e3c68b
+TEST $CLI volume delete $V0
e3c68b
+
e3c68b
+cleanup
e3c68b
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
e3c68b
index c1799ad..b248767 100644
e3c68b
--- a/xlators/features/shard/src/shard.c
e3c68b
+++ b/xlators/features/shard/src/shard.c
e3c68b
@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
e3c68b
 {
e3c68b
     int ret = -1;
e3c68b
     int64_t *size_attr = NULL;
e3c68b
+    int64_t delta_blocks = 0;
e3c68b
     inode_t *inode = NULL;
e3c68b
     shard_local_t *local = NULL;
e3c68b
     dict_t *xattr_req = NULL;
e3c68b
@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
e3c68b
 
e3c68b
     /* If both size and block count have not changed, then skip the xattrop.
e3c68b
      */
e3c68b
-    if ((local->delta_size + local->hole_size == 0) &&
e3c68b
-        (local->delta_blocks == 0)) {
e3c68b
+    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
e3c68b
+    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
e3c68b
         goto out;
e3c68b
     }
e3c68b
 
e3c68b
     ret = shard_set_size_attrs(local->delta_size + local->hole_size,
e3c68b
-                               local->delta_blocks, &size_attr);
e3c68b
+                               delta_blocks, &size_attr);
e3c68b
     if (ret) {
e3c68b
         gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
e3c68b
                "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
e3c68b
@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
e3c68b
                               dict_t *xdata)
e3c68b
 {
e3c68b
     inode_t *inode = NULL;
e3c68b
+    int64_t delta_blocks = 0;
e3c68b
     shard_local_t *local = NULL;
e3c68b
 
e3c68b
     local = frame->local;
e3c68b
@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
e3c68b
     }
e3c68b
 
e3c68b
     local->postbuf.ia_size = local->offset;
e3c68b
-    local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
e3c68b
     /* Let the delta be negative. We want xattrop to do subtraction */
e3c68b
     local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
e3c68b
-    local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
e3c68b
+    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
e3c68b
+                                 postbuf->ia_blocks - prebuf->ia_blocks);
e3c68b
+    GF_ASSERT(delta_blocks <= 0);
e3c68b
+    local->postbuf.ia_blocks += delta_blocks;
e3c68b
     local->hole_size = 0;
e3c68b
 
e3c68b
-    shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
e3c68b
-
e3c68b
+    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
e3c68b
     shard_update_file_size(frame, this, NULL, &local->loc,
e3c68b
                            shard_post_update_size_truncate_handler);
e3c68b
     return 0;
e3c68b
@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
e3c68b
                         struct iatt *preparent, struct iatt *postparent,
e3c68b
                         dict_t *xdata)
e3c68b
 {
e3c68b
+    int ret = 0;
e3c68b
     int call_count = 0;
e3c68b
     int shard_block_num = (long)cookie;
e3c68b
+    uint64_t block_count = 0;
e3c68b
     shard_local_t *local = NULL;
e3c68b
 
e3c68b
     local = frame->local;
e3c68b
@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
e3c68b
         local->op_errno = op_errno;
e3c68b
         goto done;
e3c68b
     }
e3c68b
+    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
e3c68b
+    if (!ret) {
e3c68b
+        GF_ATOMIC_SUB(local->delta_blocks, block_count);
e3c68b
+    } else {
e3c68b
+        /* dict_get failed possibly due to a heterogeneous cluster? */
e3c68b
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
e3c68b
+               "Failed to get key %s from dict during truncate of gfid %s",
e3c68b
+               GF_GET_FILE_BLOCK_COUNT,
e3c68b
+               uuid_utoa(local->resolver_base_inode->gfid));
e3c68b
+    }
e3c68b
 
e3c68b
     shard_unlink_block_inode(local, shard_block_num);
e3c68b
 done:
e3c68b
@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
e3c68b
     gf_boolean_t wind_failed = _gf_false;
e3c68b
     shard_local_t *local = NULL;
e3c68b
     shard_priv_t *priv = NULL;
e3c68b
+    dict_t *xdata_req = NULL;
e3c68b
 
e3c68b
     local = frame->local;
e3c68b
     priv = this->private;
e3c68b
@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
e3c68b
         local->postbuf.ia_size = local->offset;
e3c68b
         local->postbuf.ia_blocks = local->prebuf.ia_blocks;
e3c68b
         local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
e3c68b
-        local->delta_blocks = 0;
e3c68b
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
         local->hole_size = 0;
e3c68b
         shard_update_file_size(frame, this, local->fd, &local->loc,
e3c68b
                                shard_post_update_size_truncate_handler);
e3c68b
@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
e3c68b
 
e3c68b
     local->call_count = call_count;
e3c68b
     i = 1;
e3c68b
+    xdata_req = dict_new();
e3c68b
+    if (!xdata_req) {
e3c68b
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
e3c68b
+        return 0;
e3c68b
+    }
e3c68b
+    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
e3c68b
+    if (ret) {
e3c68b
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
e3c68b
+               "Failed to set key %s into dict during truncate of %s",
e3c68b
+               GF_GET_FILE_BLOCK_COUNT,
e3c68b
+               uuid_utoa(local->resolver_base_inode->gfid));
e3c68b
+        dict_unref(xdata_req);
e3c68b
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
e3c68b
+        return 0;
e3c68b
+    }
e3c68b
 
e3c68b
     SHARD_SET_ROOT_FS_ID(frame, local);
e3c68b
     while (cur_block <= last_block) {
e3c68b
@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
e3c68b
 
e3c68b
         STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
e3c68b
                           (void *)(long)cur_block, FIRST_CHILD(this),
e3c68b
-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
e3c68b
+                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
e3c68b
         loc_wipe(&loc;;
e3c68b
     next:
e3c68b
         i++;
e3c68b
@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
e3c68b
         if (!--call_count)
e3c68b
             break;
e3c68b
     }
e3c68b
+    dict_unref(xdata_req);
e3c68b
     return 0;
e3c68b
 }
e3c68b
 
e3c68b
@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
e3c68b
          */
e3c68b
         local->hole_size = local->offset - local->prebuf.ia_size;
e3c68b
         local->delta_size = 0;
e3c68b
-        local->delta_blocks = 0;
e3c68b
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
         local->postbuf.ia_size = local->offset;
e3c68b
         tmp_stbuf.ia_size = local->offset;
e3c68b
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
e3c68b
@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
e3c68b
          */
e3c68b
         local->hole_size = 0;
e3c68b
         local->delta_size = (local->offset - local->prebuf.ia_size);
e3c68b
-        local->delta_blocks = 0;
e3c68b
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
         tmp_stbuf.ia_size = local->offset;
e3c68b
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
e3c68b
                             SHARD_INODE_WRITE_MASK);
e3c68b
@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
e3c68b
     if (!local->xattr_req)
e3c68b
         goto err;
e3c68b
     local->resolver_base_inode = loc->inode;
e3c68b
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
 
e3c68b
     shard_lookup_base_file(frame, this, &local->loc,
e3c68b
                            shard_post_lookup_truncate_handler);
e3c68b
@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
e3c68b
     local->loc.inode = inode_ref(fd->inode);
e3c68b
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
e3c68b
     local->resolver_base_inode = fd->inode;
e3c68b
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
 
e3c68b
     shard_lookup_base_file(frame, this, &local->loc,
e3c68b
                            shard_post_lookup_truncate_handler);
e3c68b
@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
e3c68b
             local->op_errno = op_errno;
e3c68b
         } else {
e3c68b
             local->written_size += op_ret;
e3c68b
-            local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
e3c68b
+            GF_ATOMIC_ADD(local->delta_blocks,
e3c68b
+                          post->ia_blocks - pre->ia_blocks);
e3c68b
             local->delta_size += (post->ia_size - pre->ia_size);
e3c68b
             shard_inode_ctx_set(local->fd->inode, this, post, 0,
e3c68b
                                 SHARD_MASK_TIMES);
e3c68b
@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
e3c68b
     local->fd = fd_ref(fd);
e3c68b
     local->block_size = block_size;
e3c68b
     local->resolver_base_inode = local->fd->inode;
e3c68b
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
e3c68b
 
e3c68b
     local->loc.inode = inode_ref(fd->inode);
e3c68b
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
e3c68b
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
e3c68b
index cd6a663..04abd62 100644
e3c68b
--- a/xlators/features/shard/src/shard.h
e3c68b
+++ b/xlators/features/shard/src/shard.h
e3c68b
@@ -275,7 +275,7 @@ typedef struct shard_local {
e3c68b
     size_t req_size;
e3c68b
     size_t readdir_size;
e3c68b
     int64_t delta_size;
e3c68b
-    int64_t delta_blocks;
e3c68b
+    gf_atomic_t delta_blocks;
e3c68b
     loc_t loc;
e3c68b
     loc_t dot_shard_loc;
e3c68b
     loc_t dot_shard_rm_loc;
e3c68b
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
e3c68b
index b24a052..34ee2b8 100644
e3c68b
--- a/xlators/storage/posix/src/posix-entry-ops.c
e3c68b
+++ b/xlators/storage/posix/src/posix-entry-ops.c
e3c68b
@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
e3c68b
     char *real_path = NULL;
e3c68b
     char *par_path = NULL;
e3c68b
     int32_t fd = -1;
e3c68b
+    int ret = -1;
e3c68b
     struct iatt stbuf = {
e3c68b
         0,
e3c68b
     };
e3c68b
@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
e3c68b
         goto out;
e3c68b
     }
e3c68b
 
e3c68b
+    if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
e3c68b
+        ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
e3c68b
+                              stbuf.ia_blocks);
e3c68b
+        if (ret)
e3c68b
+            gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
e3c68b
+                   "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
e3c68b
+    }
e3c68b
+
e3c68b
     if (xdata && dict_get(xdata, GET_LINK_COUNT))
e3c68b
         get_link_count = _gf_true;
e3c68b
     op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b