74b1de
From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
74b1de
From: Krutika Dhananjay <kdhananj@redhat.com>
74b1de
Date: Wed, 8 May 2019 13:00:51 +0530
74b1de
Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
74b1de
 truncate to lower size
74b1de
74b1de
    > Upstream: https://review.gluster.org/22681
74b1de
    > BUG: 1705884
74b1de
    > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
74b1de
74b1de
The way delta_blocks is computed in shard is incorrect, when a file
74b1de
is truncated to a lower size. The accounting only considers change
74b1de
in size of the last of the truncated shards.
74b1de
74b1de
FIX:
74b1de
74b1de
Get the block-count of each shard just before an unlink at posix in
74b1de
xdata.  Their summation plus the change in size of last shard
74b1de
(from an actual truncate) is used to compute delta_blocks which is
74b1de
used in the xattrop for size update.
74b1de
74b1de
Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
74b1de
updates: bz#1668001
74b1de
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
74b1de
---
74b1de
 libglusterfs/src/glusterfs/glusterfs.h      |  2 +
74b1de
 tests/bugs/shard/bug-1705884.t              | 32 +++++++++++++++
74b1de
 xlators/features/shard/src/shard.c          | 60 +++++++++++++++++++++++------
74b1de
 xlators/features/shard/src/shard.h          |  2 +-
74b1de
 xlators/storage/posix/src/posix-entry-ops.c |  9 +++++
74b1de
 5 files changed, 92 insertions(+), 13 deletions(-)
74b1de
 create mode 100644 tests/bugs/shard/bug-1705884.t
74b1de
74b1de
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
74b1de
index 516b497..9ec2365 100644
74b1de
--- a/libglusterfs/src/glusterfs/glusterfs.h
74b1de
+++ b/libglusterfs/src/glusterfs/glusterfs.h
74b1de
@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
74b1de
 #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
74b1de
 #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
74b1de
 
74b1de
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
74b1de
+
74b1de
 #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
74b1de
 
74b1de
 #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
74b1de
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
74b1de
new file mode 100644
74b1de
index 0000000..f6e5037
74b1de
--- /dev/null
74b1de
+++ b/tests/bugs/shard/bug-1705884.t
74b1de
@@ -0,0 +1,32 @@
74b1de
+#!/bin/bash
74b1de
+
74b1de
+. $(dirname $0)/../../include.rc
74b1de
+. $(dirname $0)/../../volume.rc
74b1de
+. $(dirname $0)/../../fallocate.rc
74b1de
+
74b1de
+cleanup
74b1de
+
74b1de
+require_fallocate -l 1m $M0/file
74b1de
+
74b1de
+TEST glusterd
74b1de
+TEST pidof glusterd
74b1de
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
74b1de
+TEST $CLI volume set $V0 features.shard on
74b1de
+TEST $CLI volume set $V0 performance.write-behind off
74b1de
+TEST $CLI volume set $V0 performance.stat-prefetch off
74b1de
+TEST $CLI volume start $V0
74b1de
+
74b1de
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
74b1de
+
74b1de
+TEST fallocate -l 200M $M0/foo
74b1de
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
74b1de
+TEST truncate -s 0 $M0/foo
74b1de
+EXPECT "0" stat -c %b $M0/foo
74b1de
+TEST fallocate -l 100M $M0/foo
74b1de
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
74b1de
+
74b1de
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
74b1de
+TEST $CLI volume stop $V0
74b1de
+TEST $CLI volume delete $V0
74b1de
+
74b1de
+cleanup
74b1de
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
74b1de
index c1799ad..b248767 100644
74b1de
--- a/xlators/features/shard/src/shard.c
74b1de
+++ b/xlators/features/shard/src/shard.c
74b1de
@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
74b1de
 {
74b1de
     int ret = -1;
74b1de
     int64_t *size_attr = NULL;
74b1de
+    int64_t delta_blocks = 0;
74b1de
     inode_t *inode = NULL;
74b1de
     shard_local_t *local = NULL;
74b1de
     dict_t *xattr_req = NULL;
74b1de
@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
74b1de
 
74b1de
     /* If both size and block count have not changed, then skip the xattrop.
74b1de
      */
74b1de
-    if ((local->delta_size + local->hole_size == 0) &&
74b1de
-        (local->delta_blocks == 0)) {
74b1de
+    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
74b1de
+    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
74b1de
         goto out;
74b1de
     }
74b1de
 
74b1de
     ret = shard_set_size_attrs(local->delta_size + local->hole_size,
74b1de
-                               local->delta_blocks, &size_attr);
74b1de
+                               delta_blocks, &size_attr);
74b1de
     if (ret) {
74b1de
         gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
74b1de
                "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
74b1de
@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74b1de
                               dict_t *xdata)
74b1de
 {
74b1de
     inode_t *inode = NULL;
74b1de
+    int64_t delta_blocks = 0;
74b1de
     shard_local_t *local = NULL;
74b1de
 
74b1de
     local = frame->local;
74b1de
@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74b1de
     }
74b1de
 
74b1de
     local->postbuf.ia_size = local->offset;
74b1de
-    local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
74b1de
     /* Let the delta be negative. We want xattrop to do subtraction */
74b1de
     local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
74b1de
-    local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
74b1de
+    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
74b1de
+                                 postbuf->ia_blocks - prebuf->ia_blocks);
74b1de
+    GF_ASSERT(delta_blocks <= 0);
74b1de
+    local->postbuf.ia_blocks += delta_blocks;
74b1de
     local->hole_size = 0;
74b1de
 
74b1de
-    shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
74b1de
-
74b1de
+    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
74b1de
     shard_update_file_size(frame, this, NULL, &local->loc,
74b1de
                            shard_post_update_size_truncate_handler);
74b1de
     return 0;
74b1de
@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74b1de
                         struct iatt *preparent, struct iatt *postparent,
74b1de
                         dict_t *xdata)
74b1de
 {
74b1de
+    int ret = 0;
74b1de
     int call_count = 0;
74b1de
     int shard_block_num = (long)cookie;
74b1de
+    uint64_t block_count = 0;
74b1de
     shard_local_t *local = NULL;
74b1de
 
74b1de
     local = frame->local;
74b1de
@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74b1de
         local->op_errno = op_errno;
74b1de
         goto done;
74b1de
     }
74b1de
+    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
74b1de
+    if (!ret) {
74b1de
+        GF_ATOMIC_SUB(local->delta_blocks, block_count);
74b1de
+    } else {
74b1de
+        /* dict_get failed possibly due to a heterogeneous cluster? */
74b1de
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
74b1de
+               "Failed to get key %s from dict during truncate of gfid %s",
74b1de
+               GF_GET_FILE_BLOCK_COUNT,
74b1de
+               uuid_utoa(local->resolver_base_inode->gfid));
74b1de
+    }
74b1de
 
74b1de
     shard_unlink_block_inode(local, shard_block_num);
74b1de
 done:
74b1de
@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
74b1de
     gf_boolean_t wind_failed = _gf_false;
74b1de
     shard_local_t *local = NULL;
74b1de
     shard_priv_t *priv = NULL;
74b1de
+    dict_t *xdata_req = NULL;
74b1de
 
74b1de
     local = frame->local;
74b1de
     priv = this->private;
74b1de
@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
74b1de
         local->postbuf.ia_size = local->offset;
74b1de
         local->postbuf.ia_blocks = local->prebuf.ia_blocks;
74b1de
         local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
74b1de
-        local->delta_blocks = 0;
74b1de
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
         local->hole_size = 0;
74b1de
         shard_update_file_size(frame, this, local->fd, &local->loc,
74b1de
                                shard_post_update_size_truncate_handler);
74b1de
@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
74b1de
 
74b1de
     local->call_count = call_count;
74b1de
     i = 1;
74b1de
+    xdata_req = dict_new();
74b1de
+    if (!xdata_req) {
74b1de
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
74b1de
+        return 0;
74b1de
+    }
74b1de
+    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
74b1de
+    if (ret) {
74b1de
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
74b1de
+               "Failed to set key %s into dict during truncate of %s",
74b1de
+               GF_GET_FILE_BLOCK_COUNT,
74b1de
+               uuid_utoa(local->resolver_base_inode->gfid));
74b1de
+        dict_unref(xdata_req);
74b1de
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
74b1de
+        return 0;
74b1de
+    }
74b1de
 
74b1de
     SHARD_SET_ROOT_FS_ID(frame, local);
74b1de
     while (cur_block <= last_block) {
74b1de
@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
74b1de
 
74b1de
         STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
74b1de
                           (void *)(long)cur_block, FIRST_CHILD(this),
74b1de
-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
74b1de
+                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
74b1de
         loc_wipe(&loc;;
74b1de
     next:
74b1de
         i++;
74b1de
@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
74b1de
         if (!--call_count)
74b1de
             break;
74b1de
     }
74b1de
+    dict_unref(xdata_req);
74b1de
     return 0;
74b1de
 }
74b1de
 
74b1de
@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
74b1de
          */
74b1de
         local->hole_size = local->offset - local->prebuf.ia_size;
74b1de
         local->delta_size = 0;
74b1de
-        local->delta_blocks = 0;
74b1de
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
         local->postbuf.ia_size = local->offset;
74b1de
         tmp_stbuf.ia_size = local->offset;
74b1de
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
74b1de
@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
74b1de
          */
74b1de
         local->hole_size = 0;
74b1de
         local->delta_size = (local->offset - local->prebuf.ia_size);
74b1de
-        local->delta_blocks = 0;
74b1de
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
         tmp_stbuf.ia_size = local->offset;
74b1de
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
74b1de
                             SHARD_INODE_WRITE_MASK);
74b1de
@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
74b1de
     if (!local->xattr_req)
74b1de
         goto err;
74b1de
     local->resolver_base_inode = loc->inode;
74b1de
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
 
74b1de
     shard_lookup_base_file(frame, this, &local->loc,
74b1de
                            shard_post_lookup_truncate_handler);
74b1de
@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
74b1de
     local->loc.inode = inode_ref(fd->inode);
74b1de
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
74b1de
     local->resolver_base_inode = fd->inode;
74b1de
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
 
74b1de
     shard_lookup_base_file(frame, this, &local->loc,
74b1de
                            shard_post_lookup_truncate_handler);
74b1de
@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
74b1de
             local->op_errno = op_errno;
74b1de
         } else {
74b1de
             local->written_size += op_ret;
74b1de
-            local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
74b1de
+            GF_ATOMIC_ADD(local->delta_blocks,
74b1de
+                          post->ia_blocks - pre->ia_blocks);
74b1de
             local->delta_size += (post->ia_size - pre->ia_size);
74b1de
             shard_inode_ctx_set(local->fd->inode, this, post, 0,
74b1de
                                 SHARD_MASK_TIMES);
74b1de
@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
74b1de
     local->fd = fd_ref(fd);
74b1de
     local->block_size = block_size;
74b1de
     local->resolver_base_inode = local->fd->inode;
74b1de
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
74b1de
 
74b1de
     local->loc.inode = inode_ref(fd->inode);
74b1de
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
74b1de
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
74b1de
index cd6a663..04abd62 100644
74b1de
--- a/xlators/features/shard/src/shard.h
74b1de
+++ b/xlators/features/shard/src/shard.h
74b1de
@@ -275,7 +275,7 @@ typedef struct shard_local {
74b1de
     size_t req_size;
74b1de
     size_t readdir_size;
74b1de
     int64_t delta_size;
74b1de
-    int64_t delta_blocks;
74b1de
+    gf_atomic_t delta_blocks;
74b1de
     loc_t loc;
74b1de
     loc_t dot_shard_loc;
74b1de
     loc_t dot_shard_rm_loc;
74b1de
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
74b1de
index b24a052..34ee2b8 100644
74b1de
--- a/xlators/storage/posix/src/posix-entry-ops.c
74b1de
+++ b/xlators/storage/posix/src/posix-entry-ops.c
74b1de
@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
74b1de
     char *real_path = NULL;
74b1de
     char *par_path = NULL;
74b1de
     int32_t fd = -1;
74b1de
+    int ret = -1;
74b1de
     struct iatt stbuf = {
74b1de
         0,
74b1de
     };
74b1de
@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
74b1de
         goto out;
74b1de
     }
74b1de
 
74b1de
+    if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
74b1de
+        ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
74b1de
+                              stbuf.ia_blocks);
74b1de
+        if (ret)
74b1de
+            gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
74b1de
+                   "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
74b1de
+    }
74b1de
+
74b1de
     if (xdata && dict_get(xdata, GET_LINK_COUNT))
74b1de
         get_link_count = _gf_true;
74b1de
     op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
74b1de
-- 
74b1de
1.8.3.1
74b1de