e7a346
From f4d1a1683882a4da81240413dae1f6a390ee2442 Mon Sep 17 00:00:00 2001
e7a346
From: Krutika Dhananjay <kdhananj@redhat.com>
e7a346
Date: Thu, 24 Jan 2019 14:14:39 +0530
e7a346
Subject: [PATCH 510/510] features/shard: Ref shard inode while adding to fsync
e7a346
 list
e7a346
e7a346
> Upstream: https://review.gluster.org/22091
e7a346
> BUG: 1669077
e7a346
> Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
e7a346
e7a346
PROBLEM:
e7a346
e7a346
Lot of the earlier changes in the management of shards in lru, fsync
e7a346
lists assumed that if a given shard exists in fsync list, it must be
e7a346
part of lru list as well. This was found to be not true.
e7a346
e7a346
Consider this - a file is FALLOCATE'd to a size which would make the
e7a346
number of participant shards to be greater than the lru list size.
e7a346
In this case, some of the resolved shards that are to participate in
e7a346
this fop will be evicted from lru list to give way to the rest of the
e7a346
shards. And once FALLOCATE completes, these shards are added to fsync
e7a346
list but without a ref. After the fop completes, these shard inodes
e7a346
are unref'd and destroyed while their inode ctxs are still part of
e7a346
fsync list. Now when an FSYNC is called on the base file and the
e7a346
fsync-list traversed, the client crashes due to illegal memory access.
e7a346
e7a346
FIX:
e7a346
e7a346
Hold a ref on the shard inode when adding to fsync list as well.
e7a346
And unref under following conditions:
e7a346
1. when the shard is evicted from lru list
e7a346
2. when the base file is fsync'd
e7a346
3. when the shards are deleted.
e7a346
e7a346
Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
e7a346
BUG: 1668304
e7a346
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/161397
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
e7a346
---
e7a346
 tests/bugs/shard/bug-1669077.t     | 29 +++++++++++++++++++++++++++++
e7a346
 xlators/features/shard/src/shard.c | 29 +++++++++++++++++++++--------
e7a346
 2 files changed, 50 insertions(+), 8 deletions(-)
e7a346
 create mode 100644 tests/bugs/shard/bug-1669077.t
e7a346
e7a346
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
e7a346
new file mode 100644
e7a346
index 000000000..8d3a67a36
e7a346
--- /dev/null
e7a346
+++ b/tests/bugs/shard/bug-1669077.t
e7a346
@@ -0,0 +1,29 @@
e7a346
+#!/bin/bash
e7a346
+
e7a346
+. $(dirname $0)/../../include.rc
e7a346
+. $(dirname $0)/../../volume.rc
e7a346
+
e7a346
+SHARD_COUNT_TIME=5
e7a346
+
e7a346
+cleanup
e7a346
+
e7a346
+TEST glusterd
e7a346
+TEST pidof glusterd
e7a346
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
e7a346
+TEST $CLI volume set $V0 features.shard on
e7a346
+TEST $CLI volume set $V0 features.shard-block-size 4MB
e7a346
+TEST $CLI volume set $V0 features.shard-lru-limit 25
e7a346
+
e7a346
+TEST $CLI volume start $V0
e7a346
+
e7a346
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
e7a346
+
e7a346
+# If the bug still exists, client should crash during fallocate below
e7a346
+TEST fallocate -l 200M $M0/foo
e7a346
+
e7a346
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
e7a346
+
e7a346
+TEST $CLI volume stop $V0
e7a346
+TEST $CLI volume delete $V0
e7a346
+
e7a346
+cleanup
e7a346
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
e7a346
index 19dd3e4ba..cd388500d 100644
e7a346
--- a/xlators/features/shard/src/shard.c
e7a346
+++ b/xlators/features/shard/src/shard.c
e7a346
@@ -272,6 +272,7 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
e7a346
          * of the to_fsync_list.
e7a346
          */
e7a346
         inode_ref (base_inode);
e7a346
+        inode_ref(shard_inode);
e7a346
 
e7a346
         LOCK (&base_inode->lock);
e7a346
         LOCK (&shard_inode->lock);
e7a346
@@ -285,8 +286,10 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
e7a346
         /* Unref the base inode corresponding to the ref above, if the shard is
e7a346
          * found to be already part of the fsync list.
e7a346
          */
e7a346
-        if (ret != 0)
e7a346
+        if (ret != 0) {
e7a346
                 inode_unref (base_inode);
e7a346
+                inode_unref(shard_inode);
e7a346
+        }
e7a346
         return ret;
e7a346
 }
e7a346
 
e7a346
@@ -735,6 +738,10 @@ after_fsync_check:
e7a346
                                               block_bname);
e7a346
                                 inode_forget (lru_inode, 0);
e7a346
                         } else {
e7a346
+                                /* The following unref corresponds to the ref
e7a346
+                                 * held when the shard was added to fsync list.
e7a346
+                                 */
e7a346
+                                inode_unref(lru_inode);
e7a346
                                 fsync_inode = lru_inode;
e7a346
                                 if (lru_base_inode)
e7a346
                                         inode_unref (lru_base_inode);
e7a346
@@ -2947,7 +2954,7 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
e7a346
         shard_priv_t         *priv              = NULL;
e7a346
         shard_inode_ctx_t    *ctx               = NULL;
e7a346
         shard_inode_ctx_t    *base_ictx         = NULL;
e7a346
-        gf_boolean_t          unlink_unref_forget = _gf_false;
e7a346
+        int                   unref_shard_inode = 0;
e7a346
 
e7a346
         this = THIS;
e7a346
         priv = this->private;
e7a346
@@ -2973,11 +2980,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
e7a346
                         list_del_init (&ctx->ilist);
e7a346
                         priv->inode_count--;
e7a346
                         unref_base_inode++;
e7a346
+                        unref_shard_inode++;
e7a346
                         GF_ASSERT (priv->inode_count >= 0);
e7a346
-                        unlink_unref_forget = _gf_true;
e7a346
                 }
e7a346
                 if (ctx->fsync_needed) {
e7a346
                         unref_base_inode++;
e7a346
+                        unref_shard_inode++;
e7a346
                         list_del_init (&ctx->to_fsync_list);
e7a346
                         if (base_inode) {
e7a346
                                 __shard_inode_ctx_get (base_inode, this, &base_ictx);
e7a346
@@ -2988,11 +2996,11 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
e7a346
         UNLOCK(&inode->lock);
e7a346
         if (base_inode)
e7a346
                 UNLOCK(&base_inode->lock);
e7a346
-        if (unlink_unref_forget) {
e7a346
-                inode_unlink (inode, priv->dot_shard_inode, block_bname);
e7a346
-                inode_unref (inode);
e7a346
-                inode_forget (inode, 0);
e7a346
-        }
e7a346
+
e7a346
+        inode_unlink(inode, priv->dot_shard_inode, block_bname);
e7a346
+        inode_ref_reduce_by_n(inode, unref_shard_inode);
e7a346
+        inode_forget(inode, 0);
e7a346
+
e7a346
         if (base_inode && unref_base_inode)
e7a346
                 inode_ref_reduce_by_n (base_inode, unref_base_inode);
e7a346
         UNLOCK(&priv->lock);
e7a346
@@ -5824,6 +5832,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
         shard_inode_ctx_t     *ctx         = NULL;
e7a346
         shard_inode_ctx_t     *base_ictx   = NULL;
e7a346
         inode_t               *base_inode  = NULL;
e7a346
+        gf_boolean_t           unref_shard_inode = _gf_false;
e7a346
 
e7a346
         local = frame->local;
e7a346
         base_inode = local->fd->inode;
e7a346
@@ -5858,11 +5867,15 @@ out:
e7a346
                                 list_add_tail (&ctx->to_fsync_list,
e7a346
                                                &base_ictx->to_fsync_list);
e7a346
                                 base_ictx->fsync_count++;
e7a346
+                        } else {
e7a346
+                            unref_shard_inode = _gf_true;
e7a346
                         }
e7a346
                 }
e7a346
                 UNLOCK (&anon_fd->inode->lock);
e7a346
                 UNLOCK (&base_inode->lock);
e7a346
         }
e7a346
+        if (unref_shard_inode)
e7a346
+                inode_unref(anon_fd->inode);
e7a346
         if (anon_fd)
e7a346
                 fd_unref (anon_fd);
e7a346
 
e7a346
-- 
e7a346
2.20.1
e7a346