887953
From f4d1a1683882a4da81240413dae1f6a390ee2442 Mon Sep 17 00:00:00 2001
887953
From: Krutika Dhananjay <kdhananj@redhat.com>
887953
Date: Thu, 24 Jan 2019 14:14:39 +0530
887953
Subject: [PATCH 510/510] features/shard: Ref shard inode while adding to fsync
887953
 list
887953
887953
> Upstream: https://review.gluster.org/22091
887953
> BUG: 1669077
887953
> Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
887953
887953
PROBLEM:
887953
887953
Lot of the earlier changes in the management of shards in lru, fsync
887953
lists assumed that if a given shard exists in fsync list, it must be
887953
part of lru list as well. This was found to be not true.
887953
887953
Consider this - a file is FALLOCATE'd to a size which would make the
887953
number of participant shards to be greater than the lru list size.
887953
In this case, some of the resolved shards that are to participate in
887953
this fop will be evicted from lru list to give way to the rest of the
887953
shards. And once FALLOCATE completes, these shards are added to fsync
887953
list but without a ref. After the fop completes, these shard inodes
887953
are unref'd and destroyed while their inode ctxs are still part of
887953
fsync list. Now when an FSYNC is called on the base file and the
887953
fsync-list traversed, the client crashes due to illegal memory access.
887953
887953
FIX:
887953
887953
Hold a ref on the shard inode when adding to fsync list as well.
887953
And unref under following conditions:
887953
1. when the shard is evicted from lru list
887953
2. when the base file is fsync'd
887953
3. when the shards are deleted.
887953
887953
Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
887953
BUG: 1668304
887953
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/161397
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
887953
---
887953
 tests/bugs/shard/bug-1669077.t     | 29 +++++++++++++++++++++++++++++
887953
 xlators/features/shard/src/shard.c | 29 +++++++++++++++++++++--------
887953
 2 files changed, 50 insertions(+), 8 deletions(-)
887953
 create mode 100644 tests/bugs/shard/bug-1669077.t
887953
887953
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
887953
new file mode 100644
887953
index 000000000..8d3a67a36
887953
--- /dev/null
887953
+++ b/tests/bugs/shard/bug-1669077.t
887953
@@ -0,0 +1,29 @@
887953
+#!/bin/bash
887953
+
887953
+. $(dirname $0)/../../include.rc
887953
+. $(dirname $0)/../../volume.rc
887953
+
887953
+SHARD_COUNT_TIME=5
887953
+
887953
+cleanup
887953
+
887953
+TEST glusterd
887953
+TEST pidof glusterd
887953
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
887953
+TEST $CLI volume set $V0 features.shard on
887953
+TEST $CLI volume set $V0 features.shard-block-size 4MB
887953
+TEST $CLI volume set $V0 features.shard-lru-limit 25
887953
+
887953
+TEST $CLI volume start $V0
887953
+
887953
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
887953
+
887953
+# If the bug still exists, client should crash during fallocate below
887953
+TEST fallocate -l 200M $M0/foo
887953
+
887953
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
+
887953
+TEST $CLI volume stop $V0
887953
+TEST $CLI volume delete $V0
887953
+
887953
+cleanup
887953
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
887953
index 19dd3e4ba..cd388500d 100644
887953
--- a/xlators/features/shard/src/shard.c
887953
+++ b/xlators/features/shard/src/shard.c
887953
@@ -272,6 +272,7 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
887953
          * of the to_fsync_list.
887953
          */
887953
         inode_ref (base_inode);
887953
+        inode_ref(shard_inode);
887953
 
887953
         LOCK (&base_inode->lock);
887953
         LOCK (&shard_inode->lock);
887953
@@ -285,8 +286,10 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
887953
         /* Unref the base inode corresponding to the ref above, if the shard is
887953
          * found to be already part of the fsync list.
887953
          */
887953
-        if (ret != 0)
887953
+        if (ret != 0) {
887953
                 inode_unref (base_inode);
887953
+                inode_unref(shard_inode);
887953
+        }
887953
         return ret;
887953
 }
887953
 
887953
@@ -735,6 +738,10 @@ after_fsync_check:
887953
                                               block_bname);
887953
                                 inode_forget (lru_inode, 0);
887953
                         } else {
887953
+                                /* The following unref corresponds to the ref
887953
+                                 * held when the shard was added to fsync list.
887953
+                                 */
887953
+                                inode_unref(lru_inode);
887953
                                 fsync_inode = lru_inode;
887953
                                 if (lru_base_inode)
887953
                                         inode_unref (lru_base_inode);
887953
@@ -2947,7 +2954,7 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
887953
         shard_priv_t         *priv              = NULL;
887953
         shard_inode_ctx_t    *ctx               = NULL;
887953
         shard_inode_ctx_t    *base_ictx         = NULL;
887953
-        gf_boolean_t          unlink_unref_forget = _gf_false;
887953
+        int                   unref_shard_inode = 0;
887953
 
887953
         this = THIS;
887953
         priv = this->private;
887953
@@ -2973,11 +2980,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
887953
                         list_del_init (&ctx->ilist);
887953
                         priv->inode_count--;
887953
                         unref_base_inode++;
887953
+                        unref_shard_inode++;
887953
                         GF_ASSERT (priv->inode_count >= 0);
887953
-                        unlink_unref_forget = _gf_true;
887953
                 }
887953
                 if (ctx->fsync_needed) {
887953
                         unref_base_inode++;
887953
+                        unref_shard_inode++;
887953
                         list_del_init (&ctx->to_fsync_list);
887953
                         if (base_inode) {
887953
                                 __shard_inode_ctx_get (base_inode, this, &base_ictx);
887953
@@ -2988,11 +2996,11 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
887953
         UNLOCK(&inode->lock);
887953
         if (base_inode)
887953
                 UNLOCK(&base_inode->lock);
887953
-        if (unlink_unref_forget) {
887953
-                inode_unlink (inode, priv->dot_shard_inode, block_bname);
887953
-                inode_unref (inode);
887953
-                inode_forget (inode, 0);
887953
-        }
887953
+
887953
+        inode_unlink(inode, priv->dot_shard_inode, block_bname);
887953
+        inode_ref_reduce_by_n(inode, unref_shard_inode);
887953
+        inode_forget(inode, 0);
887953
+
887953
         if (base_inode && unref_base_inode)
887953
                 inode_ref_reduce_by_n (base_inode, unref_base_inode);
887953
         UNLOCK(&priv->lock);
887953
@@ -5824,6 +5832,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
887953
         shard_inode_ctx_t     *ctx         = NULL;
887953
         shard_inode_ctx_t     *base_ictx   = NULL;
887953
         inode_t               *base_inode  = NULL;
887953
+        gf_boolean_t           unref_shard_inode = _gf_false;
887953
 
887953
         local = frame->local;
887953
         base_inode = local->fd->inode;
887953
@@ -5858,11 +5867,15 @@ out:
887953
                                 list_add_tail (&ctx->to_fsync_list,
887953
                                                &base_ictx->to_fsync_list);
887953
                                 base_ictx->fsync_count++;
887953
+                        } else {
887953
+                            unref_shard_inode = _gf_true;
887953
                         }
887953
                 }
887953
                 UNLOCK (&anon_fd->inode->lock);
887953
                 UNLOCK (&base_inode->lock);
887953
         }
887953
+        if (unref_shard_inode)
887953
+                inode_unref(anon_fd->inode);
887953
         if (anon_fd)
887953
                 fd_unref (anon_fd);
887953
 
887953
-- 
887953
2.20.1
887953