3604df
From 374038e81ce8e6d0d08e9b74ec4c14a01393f9e6 Mon Sep 17 00:00:00 2001
3604df
From: Krutika Dhananjay <kdhananj@redhat.com>
3604df
Date: Tue, 17 May 2016 15:37:18 +0530
3604df
Subject: [PATCH 297/300] features/shard: Fix EIO error on add-brick
3604df
3604df
        Backport of: https://review.gluster.org/14419
3604df
3604df
DHT seems to link inode during lookup even before initializing
3604df
inode ctx with layout information, which comes after
3604df
directory healing.
3604df
3604df
Consider two parallel writes. As part of the first write,
3604df
shard sends lookup on .shard which in its return path would
3604df
cause DHT to link .shard inode. Now at this point, when a
3604df
second write is wound, inode_find() of .shard succeeds and
3604df
as a result of this, shard goes to create the participant
3604df
shards by issuing MKNODs under .shard. Since the layout is
3604df
yet to be initialized, mknod fails in dht call path with EIO,
3604df
leading to VM pauses.
3604df
3604df
The fix involves shard maintaining a flag to denote whether
3604df
a fresh lookup on .shard completed one network trip. If it
3604df
didn't, all inode_find()s in fop path will be followed by a
3604df
lookup before proceeding with the next stage of the fop.
3604df
3604df
Big thanks to Raghavendra G and Pranith Kumar K for the RCA
3604df
and subsequent inputs and feedback on the patch.
3604df
3604df
Change-Id: I467d0790c1b23cfcacb5c8b5470bb972fb6173d4
3604df
BUG: 1412554
3604df
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/98554
3604df
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
3604df
---
3604df
 xlators/features/shard/src/shard.c | 172 +++++++++++++++++++++++++++++++++----
3604df
 xlators/features/shard/src/shard.h |   1 +
3604df
 2 files changed, 154 insertions(+), 19 deletions(-)
3604df
3604df
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
3604df
index 59a0cfa..39d648b 100644
3604df
--- a/xlators/features/shard/src/shard.c
3604df
+++ b/xlators/features/shard/src/shard.c
3604df
@@ -178,6 +178,63 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
3604df
 }
3604df
 
3604df
 int
3604df
+__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
3604df
+{
3604df
+        int                 ret = -1;
3604df
+        shard_inode_ctx_t  *ctx = NULL;
3604df
+
3604df
+        ret = __shard_inode_ctx_get (inode, this, &ctx;;
3604df
+        if (ret)
3604df
+                return ret;
3604df
+
3604df
+        ctx->refreshed = _gf_true;
3604df
+        return 0;
3604df
+}
3604df
+
3604df
+int
3604df
+shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
3604df
+{
3604df
+        int ret = -1;
3604df
+
3604df
+        LOCK (&inode->lock);
3604df
+        {
3604df
+                ret = __shard_inode_ctx_set_refreshed_flag (inode, this);
3604df
+        }
3604df
+        UNLOCK (&inode->lock);
3604df
+
3604df
+        return ret;
3604df
+}
3604df
+
3604df
+gf_boolean_t
3604df
+__shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
3604df
+{
3604df
+        int                 ret = -1;
3604df
+        shard_inode_ctx_t  *ctx = NULL;
3604df
+
3604df
+        ret = __shard_inode_ctx_get (inode, this, &ctx;;
3604df
+        /* If inode ctx get fails, better to err on the side of caution and
3604df
+         * try again? Unless the failure is due to mem-allocation.
3604df
+         */
3604df
+        if (ret)
3604df
+                return _gf_true;
3604df
+
3604df
+        return !ctx->refreshed;
3604df
+}
3604df
+
3604df
+gf_boolean_t
3604df
+shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
3604df
+{
3604df
+        gf_boolean_t flag = _gf_false;
3604df
+
3604df
+        LOCK (&inode->lock);
3604df
+        {
3604df
+                flag = __shard_inode_ctx_needs_lookup (inode, this);
3604df
+        }
3604df
+        UNLOCK (&inode->lock);
3604df
+
3604df
+        return flag;
3604df
+}
3604df
+int
3604df
 __shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf)
3604df
 {
3604df
         int                 ret = -1;
3604df
@@ -756,7 +813,7 @@ out:
3604df
 
3604df
 }
3604df
 
3604df
-static void
3604df
+static inode_t *
3604df
 shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
3604df
                             struct iatt *buf)
3604df
 {
3604df
@@ -765,10 +822,72 @@ shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
3604df
 
3604df
         priv = THIS->private;
3604df
 
3604df
-        linked_inode = inode_link (inode, local->dot_shard_loc.parent,
3604df
-                                   local->dot_shard_loc.name, buf);
3604df
+        linked_inode = inode_link (inode, inode->table->root, ".shard", buf);
3604df
         inode_lookup (linked_inode);
3604df
         priv->dot_shard_inode = linked_inode;
3604df
+        return linked_inode;
3604df
+}
3604df
+
3604df
+
3604df
+int
3604df
+shard_refresh_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
+                             int32_t op_ret, int32_t op_errno, inode_t *inode,
3604df
+                             struct iatt *buf, dict_t *xdata,
3604df
+                             struct iatt *postparent)
3604df
+{
3604df
+        shard_local_t *local = NULL;
3604df
+
3604df
+        local = frame->local;
3604df
+
3604df
+        if (op_ret) {
3604df
+                local->op_ret = op_ret;
3604df
+                local->op_errno = op_errno;
3604df
+                goto out;
3604df
+        }
3604df
+
3604df
+        /* To-Do: Fix refcount increment per call to
3604df
+         * shard_link_dot_shard_inode().
3604df
+         */
3604df
+        shard_link_dot_shard_inode (local, inode, buf);
3604df
+        shard_inode_ctx_set_refreshed_flag (inode, this);
3604df
+out:
3604df
+        shard_common_resolve_shards (frame, this, local->post_res_handler);
3604df
+        return 0;
3604df
+}
3604df
+
3604df
+int
3604df
+shard_refresh_dot_shard (call_frame_t *frame, xlator_t *this)
3604df
+{
3604df
+        loc_t          loc       = {0,};
3604df
+        inode_t       *inode     = NULL;
3604df
+        shard_priv_t  *priv      = NULL;
3604df
+        shard_local_t *local     = NULL;
3604df
+
3604df
+        local = frame->local;
3604df
+        priv = this->private;
3604df
+
3604df
+        inode = inode_find (this->itable, priv->dot_shard_gfid);
3604df
+
3604df
+        if (!shard_inode_ctx_needs_lookup (inode, this)) {
3604df
+                local->op_ret = 0;
3604df
+                goto out;
3604df
+        }
3604df
+
3604df
+        /* Plain assignment because the ref is already taken above through
3604df
+         * call to inode_find()
3604df
+         */
3604df
+        loc.inode = inode;
3604df
+        gf_uuid_copy (loc.gfid, priv->dot_shard_gfid);
3604df
+
3604df
+        STACK_WIND (frame, shard_refresh_dot_shard_cbk, FIRST_CHILD(this),
3604df
+                    FIRST_CHILD(this)->fops->lookup, &loc, NULL);
3604df
+        loc_wipe (&loc;;
3604df
+
3604df
+        return 0;
3604df
+
3604df
+out:
3604df
+        shard_common_resolve_shards (frame, this, local->post_res_handler);
3604df
+        return 0;
3604df
 }
3604df
 
3604df
 int
3604df
@@ -777,7 +896,8 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                             struct iatt *buf, dict_t *xdata,
3604df
                             struct iatt *postparent)
3604df
 {
3604df
-        shard_local_t *local = NULL;
3604df
+        inode_t       *link_inode = NULL;
3604df
+        shard_local_t *local      = NULL;
3604df
 
3604df
         local = frame->local;
3604df
 
3604df
@@ -797,8 +917,14 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                 goto unwind;
3604df
         }
3604df
 
3604df
-        shard_link_dot_shard_inode (local, inode, buf);
3604df
-        shard_common_resolve_shards (frame, this, local->post_res_handler);
3604df
+        link_inode = shard_link_dot_shard_inode (local, inode, buf);
3604df
+        if (link_inode != inode) {
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
+        } else {
3604df
+                shard_inode_ctx_set_refreshed_flag (link_inode, this);
3604df
+                shard_common_resolve_shards (frame, this,
3604df
+                                             local->post_res_handler);
3604df
+        }
3604df
         return 0;
3604df
 
3604df
 unwind:
3604df
@@ -1843,8 +1969,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this)
3604df
                 shard_lookup_dot_shard (frame, this,
3604df
                                         shard_post_resolve_truncate_handler);
3604df
         } else {
3604df
-                shard_common_resolve_shards (frame, this,
3604df
-                                           shard_post_resolve_truncate_handler);
3604df
+                local->post_res_handler = shard_post_resolve_truncate_handler;
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
         }
3604df
         return 0;
3604df
 
3604df
@@ -2273,8 +2399,8 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                 shard_lookup_dot_shard (frame, this,
3604df
                                         shard_post_resolve_unlink_handler);
3604df
         } else {
3604df
-                shard_common_resolve_shards (frame, this,
3604df
-                                             shard_post_resolve_unlink_handler);
3604df
+                local->post_res_handler = shard_post_resolve_unlink_handler;
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
         }
3604df
 
3604df
         return 0;
3604df
@@ -2624,8 +2750,8 @@ shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this)
3604df
                 shard_lookup_dot_shard (frame, this,
3604df
                                         shard_post_resolve_unlink_handler);
3604df
         } else {
3604df
-                shard_common_resolve_shards (frame, this,
3604df
-                                             shard_post_resolve_unlink_handler);
3604df
+                local->post_res_handler = shard_post_resolve_unlink_handler;
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
         }
3604df
 
3604df
         return 0;
3604df
@@ -3388,8 +3514,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
3604df
                 shard_lookup_dot_shard (frame, this,
3604df
                                         shard_post_resolve_readv_handler);
3604df
         } else {
3604df
-                shard_common_resolve_shards (frame, this,
3604df
-                                             shard_post_resolve_readv_handler);
3604df
+                local->post_res_handler = shard_post_resolve_readv_handler;
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
         }
3604df
         return 0;
3604df
 
3604df
@@ -3849,7 +3975,8 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
3604df
                                        struct iatt *buf, struct iatt *preparent,
3604df
                                        struct iatt *postparent, dict_t *xdata)
3604df
 {
3604df
-        shard_local_t *local = NULL;
3604df
+        inode_t       *link_inode = NULL;
3604df
+        shard_local_t *local      = NULL;
3604df
 
3604df
         local = frame->local;
3604df
 
3604df
@@ -3869,8 +3996,15 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
3604df
                 }
3604df
         }
3604df
 
3604df
-        shard_link_dot_shard_inode (local, inode, buf);
3604df
-
3604df
+        link_inode = shard_link_dot_shard_inode (local, inode, buf);
3604df
+        if (link_inode != inode) {
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
+        } else {
3604df
+                shard_inode_ctx_set_refreshed_flag (link_inode, this);
3604df
+                shard_common_resolve_shards (frame, this,
3604df
+                                             local->post_res_handler);
3604df
+        }
3604df
+        return 0;
3604df
 unwind:
3604df
         shard_common_resolve_shards (frame, this, local->post_res_handler);
3604df
         return 0;
3604df
@@ -4638,8 +4772,8 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
3604df
                 shard_mkdir_dot_shard (frame, this,
3604df
                                  shard_common_inode_write_post_resolve_handler);
3604df
         } else {
3604df
-                shard_common_resolve_shards (frame, this,
3604df
-                                 shard_common_inode_write_post_resolve_handler);
3604df
+                local->post_res_handler = shard_common_inode_write_post_resolve_handler;
3604df
+                shard_refresh_dot_shard (frame, this);
3604df
         }
3604df
 
3604df
         return 0;
3604df
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
3604df
index f626fae..09232a4 100644
3604df
--- a/xlators/features/shard/src/shard.h
3604df
+++ b/xlators/features/shard/src/shard.h
3604df
@@ -268,6 +268,7 @@ typedef struct shard_inode_ctx {
3604df
         struct list_head ilist;
3604df
         uuid_t base_gfid;
3604df
         int block_num;
3604df
+        gf_boolean_t refreshed;
3604df
 } shard_inode_ctx_t;
3604df
 
3604df
 #endif /* __SHARD_H__ */
3604df
-- 
3604df
2.9.3
3604df