17b94a
From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001
17b94a
From: Krutika Dhananjay <kdhananj@redhat.com>
17b94a
Date: Wed, 8 May 2019 13:00:51 +0530
17b94a
Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon
17b94a
 truncate to lower size
17b94a
17b94a
    > Upstream: https://review.gluster.org/22681
17b94a
    > BUG: 1705884
17b94a
    > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
17b94a
17b94a
The way delta_blocks is computed in shard is incorrect, when a file
17b94a
is truncated to a lower size. The accounting only considers change
17b94a
in size of the last of the truncated shards.
17b94a
17b94a
FIX:
17b94a
17b94a
Get the block-count of each shard just before an unlink at posix in
17b94a
xdata.  Their summation plus the change in size of last shard
17b94a
(from an actual truncate) is used to compute delta_blocks which is
17b94a
used in the xattrop for size update.
17b94a
17b94a
Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53
17b94a
updates: bz#1668001
17b94a
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/173477
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
17b94a
---
17b94a
 libglusterfs/src/glusterfs/glusterfs.h      |  2 +
17b94a
 tests/bugs/shard/bug-1705884.t              | 32 +++++++++++++++
17b94a
 xlators/features/shard/src/shard.c          | 60 +++++++++++++++++++++++------
17b94a
 xlators/features/shard/src/shard.h          |  2 +-
17b94a
 xlators/storage/posix/src/posix-entry-ops.c |  9 +++++
17b94a
 5 files changed, 92 insertions(+), 13 deletions(-)
17b94a
 create mode 100644 tests/bugs/shard/bug-1705884.t
17b94a
17b94a
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
17b94a
index 516b497..9ec2365 100644
17b94a
--- a/libglusterfs/src/glusterfs/glusterfs.h
17b94a
+++ b/libglusterfs/src/glusterfs/glusterfs.h
17b94a
@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator {
17b94a
 #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
17b94a
 #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
17b94a
 
17b94a
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
17b94a
+
17b94a
 #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
17b94a
 
17b94a
 #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
17b94a
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
17b94a
new file mode 100644
17b94a
index 0000000..f6e5037
17b94a
--- /dev/null
17b94a
+++ b/tests/bugs/shard/bug-1705884.t
17b94a
@@ -0,0 +1,32 @@
17b94a
+#!/bin/bash
17b94a
+
17b94a
+. $(dirname $0)/../../include.rc
17b94a
+. $(dirname $0)/../../volume.rc
17b94a
+. $(dirname $0)/../../fallocate.rc
17b94a
+
17b94a
+cleanup
17b94a
+
17b94a
+require_fallocate -l 1m $M0/file
17b94a
+
17b94a
+TEST glusterd
17b94a
+TEST pidof glusterd
17b94a
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
17b94a
+TEST $CLI volume set $V0 features.shard on
17b94a
+TEST $CLI volume set $V0 performance.write-behind off
17b94a
+TEST $CLI volume set $V0 performance.stat-prefetch off
17b94a
+TEST $CLI volume start $V0
17b94a
+
17b94a
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
17b94a
+
17b94a
+TEST fallocate -l 200M $M0/foo
17b94a
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
17b94a
+TEST truncate -s 0 $M0/foo
17b94a
+EXPECT "0" stat -c %b $M0/foo
17b94a
+TEST fallocate -l 100M $M0/foo
17b94a
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"`  stat -c %b $M0/foo
17b94a
+
17b94a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
17b94a
+TEST $CLI volume stop $V0
17b94a
+TEST $CLI volume delete $V0
17b94a
+
17b94a
+cleanup
17b94a
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
17b94a
index c1799ad..b248767 100644
17b94a
--- a/xlators/features/shard/src/shard.c
17b94a
+++ b/xlators/features/shard/src/shard.c
17b94a
@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
17b94a
 {
17b94a
     int ret = -1;
17b94a
     int64_t *size_attr = NULL;
17b94a
+    int64_t delta_blocks = 0;
17b94a
     inode_t *inode = NULL;
17b94a
     shard_local_t *local = NULL;
17b94a
     dict_t *xattr_req = NULL;
17b94a
@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
17b94a
 
17b94a
     /* If both size and block count have not changed, then skip the xattrop.
17b94a
      */
17b94a
-    if ((local->delta_size + local->hole_size == 0) &&
17b94a
-        (local->delta_blocks == 0)) {
17b94a
+    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
17b94a
+    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
     ret = shard_set_size_attrs(local->delta_size + local->hole_size,
17b94a
-                               local->delta_blocks, &size_attr);
17b94a
+                               delta_blocks, &size_attr);
17b94a
     if (ret) {
17b94a
         gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
17b94a
                "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
17b94a
@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
                               dict_t *xdata)
17b94a
 {
17b94a
     inode_t *inode = NULL;
17b94a
+    int64_t delta_blocks = 0;
17b94a
     shard_local_t *local = NULL;
17b94a
 
17b94a
     local = frame->local;
17b94a
@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
     }
17b94a
 
17b94a
     local->postbuf.ia_size = local->offset;
17b94a
-    local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
17b94a
     /* Let the delta be negative. We want xattrop to do subtraction */
17b94a
     local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
17b94a
-    local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
17b94a
+    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
17b94a
+                                 postbuf->ia_blocks - prebuf->ia_blocks);
17b94a
+    GF_ASSERT(delta_blocks <= 0);
17b94a
+    local->postbuf.ia_blocks += delta_blocks;
17b94a
     local->hole_size = 0;
17b94a
 
17b94a
-    shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
17b94a
-
17b94a
+    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
17b94a
     shard_update_file_size(frame, this, NULL, &local->loc,
17b94a
                            shard_post_update_size_truncate_handler);
17b94a
     return 0;
17b94a
@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
                         struct iatt *preparent, struct iatt *postparent,
17b94a
                         dict_t *xdata)
17b94a
 {
17b94a
+    int ret = 0;
17b94a
     int call_count = 0;
17b94a
     int shard_block_num = (long)cookie;
17b94a
+    uint64_t block_count = 0;
17b94a
     shard_local_t *local = NULL;
17b94a
 
17b94a
     local = frame->local;
17b94a
@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
         local->op_errno = op_errno;
17b94a
         goto done;
17b94a
     }
17b94a
+    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
17b94a
+    if (!ret) {
17b94a
+        GF_ATOMIC_SUB(local->delta_blocks, block_count);
17b94a
+    } else {
17b94a
+        /* dict_get failed possibly due to a heterogeneous cluster? */
17b94a
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
17b94a
+               "Failed to get key %s from dict during truncate of gfid %s",
17b94a
+               GF_GET_FILE_BLOCK_COUNT,
17b94a
+               uuid_utoa(local->resolver_base_inode->gfid));
17b94a
+    }
17b94a
 
17b94a
     shard_unlink_block_inode(local, shard_block_num);
17b94a
 done:
17b94a
@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
17b94a
     gf_boolean_t wind_failed = _gf_false;
17b94a
     shard_local_t *local = NULL;
17b94a
     shard_priv_t *priv = NULL;
17b94a
+    dict_t *xdata_req = NULL;
17b94a
 
17b94a
     local = frame->local;
17b94a
     priv = this->private;
17b94a
@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
17b94a
         local->postbuf.ia_size = local->offset;
17b94a
         local->postbuf.ia_blocks = local->prebuf.ia_blocks;
17b94a
         local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
17b94a
-        local->delta_blocks = 0;
17b94a
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
         local->hole_size = 0;
17b94a
         shard_update_file_size(frame, this, local->fd, &local->loc,
17b94a
                                shard_post_update_size_truncate_handler);
17b94a
@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
17b94a
 
17b94a
     local->call_count = call_count;
17b94a
     i = 1;
17b94a
+    xdata_req = dict_new();
17b94a
+    if (!xdata_req) {
17b94a
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
17b94a
+        return 0;
17b94a
+    }
17b94a
+    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
17b94a
+    if (ret) {
17b94a
+        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
17b94a
+               "Failed to set key %s into dict during truncate of %s",
17b94a
+               GF_GET_FILE_BLOCK_COUNT,
17b94a
+               uuid_utoa(local->resolver_base_inode->gfid));
17b94a
+        dict_unref(xdata_req);
17b94a
+        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
17b94a
+        return 0;
17b94a
+    }
17b94a
 
17b94a
     SHARD_SET_ROOT_FS_ID(frame, local);
17b94a
     while (cur_block <= last_block) {
17b94a
@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
17b94a
 
17b94a
         STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
17b94a
                           (void *)(long)cur_block, FIRST_CHILD(this),
17b94a
-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
17b94a
+                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
17b94a
         loc_wipe(&loc;;
17b94a
     next:
17b94a
         i++;
17b94a
@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
17b94a
         if (!--call_count)
17b94a
             break;
17b94a
     }
17b94a
+    dict_unref(xdata_req);
17b94a
     return 0;
17b94a
 }
17b94a
 
17b94a
@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
17b94a
          */
17b94a
         local->hole_size = local->offset - local->prebuf.ia_size;
17b94a
         local->delta_size = 0;
17b94a
-        local->delta_blocks = 0;
17b94a
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
         local->postbuf.ia_size = local->offset;
17b94a
         tmp_stbuf.ia_size = local->offset;
17b94a
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
17b94a
@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
17b94a
          */
17b94a
         local->hole_size = 0;
17b94a
         local->delta_size = (local->offset - local->prebuf.ia_size);
17b94a
-        local->delta_blocks = 0;
17b94a
+        GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
         tmp_stbuf.ia_size = local->offset;
17b94a
         shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
17b94a
                             SHARD_INODE_WRITE_MASK);
17b94a
@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
17b94a
     if (!local->xattr_req)
17b94a
         goto err;
17b94a
     local->resolver_base_inode = loc->inode;
17b94a
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
 
17b94a
     shard_lookup_base_file(frame, this, &local->loc,
17b94a
                            shard_post_lookup_truncate_handler);
17b94a
@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
17b94a
     local->loc.inode = inode_ref(fd->inode);
17b94a
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
17b94a
     local->resolver_base_inode = fd->inode;
17b94a
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
 
17b94a
     shard_lookup_base_file(frame, this, &local->loc,
17b94a
                            shard_post_lookup_truncate_handler);
17b94a
@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
17b94a
             local->op_errno = op_errno;
17b94a
         } else {
17b94a
             local->written_size += op_ret;
17b94a
-            local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
17b94a
+            GF_ATOMIC_ADD(local->delta_blocks,
17b94a
+                          post->ia_blocks - pre->ia_blocks);
17b94a
             local->delta_size += (post->ia_size - pre->ia_size);
17b94a
             shard_inode_ctx_set(local->fd->inode, this, post, 0,
17b94a
                                 SHARD_MASK_TIMES);
17b94a
@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
17b94a
     local->fd = fd_ref(fd);
17b94a
     local->block_size = block_size;
17b94a
     local->resolver_base_inode = local->fd->inode;
17b94a
+    GF_ATOMIC_INIT(local->delta_blocks, 0);
17b94a
 
17b94a
     local->loc.inode = inode_ref(fd->inode);
17b94a
     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
17b94a
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
17b94a
index cd6a663..04abd62 100644
17b94a
--- a/xlators/features/shard/src/shard.h
17b94a
+++ b/xlators/features/shard/src/shard.h
17b94a
@@ -275,7 +275,7 @@ typedef struct shard_local {
17b94a
     size_t req_size;
17b94a
     size_t readdir_size;
17b94a
     int64_t delta_size;
17b94a
-    int64_t delta_blocks;
17b94a
+    gf_atomic_t delta_blocks;
17b94a
     loc_t loc;
17b94a
     loc_t dot_shard_loc;
17b94a
     loc_t dot_shard_rm_loc;
17b94a
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
17b94a
index b24a052..34ee2b8 100644
17b94a
--- a/xlators/storage/posix/src/posix-entry-ops.c
17b94a
+++ b/xlators/storage/posix/src/posix-entry-ops.c
17b94a
@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
17b94a
     char *real_path = NULL;
17b94a
     char *par_path = NULL;
17b94a
     int32_t fd = -1;
17b94a
+    int ret = -1;
17b94a
     struct iatt stbuf = {
17b94a
         0,
17b94a
     };
17b94a
@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
+    if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) {
17b94a
+        ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
17b94a
+                              stbuf.ia_blocks);
17b94a
+        if (ret)
17b94a
+            gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
17b94a
+                   "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
17b94a
+    }
17b94a
+
17b94a
     if (xdata && dict_get(xdata, GET_LINK_COUNT))
17b94a
         get_link_count = _gf_true;
17b94a
     op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
17b94a
-- 
17b94a
1.8.3.1
17b94a