|
|
3604df |
From 374038e81ce8e6d0d08e9b74ec4c14a01393f9e6 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Date: Tue, 17 May 2016 15:37:18 +0530
|
|
|
3604df |
Subject: [PATCH 297/300] features/shard: Fix EIO error on add-brick
|
|
|
3604df |
|
|
|
3604df |
Backport of: https://review.gluster.org/14419
|
|
|
3604df |
|
|
|
3604df |
DHT seems to link inode during lookup even before initializing
|
|
|
3604df |
inode ctx with layout information, which comes after
|
|
|
3604df |
directory healing.
|
|
|
3604df |
|
|
|
3604df |
Consider two parallel writes. As part of the first write,
|
|
|
3604df |
shard sends lookup on .shard which in its return path would
|
|
|
3604df |
cause DHT to link .shard inode. Now at this point, when a
|
|
|
3604df |
second write is wound, inode_find() of .shard succeeds and
|
|
|
3604df |
as a result of this, shard goes to create the participant
|
|
|
3604df |
shards by issuing MKNODs under .shard. Since the layout is
|
|
|
3604df |
yet to be initialized, mknod fails in dht call path with EIO,
|
|
|
3604df |
leading to VM pauses.
|
|
|
3604df |
|
|
|
3604df |
The fix involves shard maintaining a flag to denote whether
|
|
|
3604df |
a fresh lookup on .shard completed one network trip. If it
|
|
|
3604df |
didn't, all inode_find()s in fop path will be followed by a
|
|
|
3604df |
lookup before proceeding with the next stage of the fop.
|
|
|
3604df |
|
|
|
3604df |
Big thanks to Raghavendra G and Pranith Kumar K for the RCA
|
|
|
3604df |
and subsequent inputs and feedback on the patch.
|
|
|
3604df |
|
|
|
3604df |
Change-Id: I467d0790c1b23cfcacb5c8b5470bb972fb6173d4
|
|
|
3604df |
BUG: 1412554
|
|
|
3604df |
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/98554
|
|
|
3604df |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
xlators/features/shard/src/shard.c | 172 +++++++++++++++++++++++++++++++++----
|
|
|
3604df |
xlators/features/shard/src/shard.h | 1 +
|
|
|
3604df |
2 files changed, 154 insertions(+), 19 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
3604df |
index 59a0cfa..39d648b 100644
|
|
|
3604df |
--- a/xlators/features/shard/src/shard.c
|
|
|
3604df |
+++ b/xlators/features/shard/src/shard.c
|
|
|
3604df |
@@ -178,6 +178,63 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
+__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int ret = -1;
|
|
|
3604df |
+ shard_inode_ctx_t *ctx = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = __shard_inode_ctx_get (inode, this, &ctx;;
|
|
|
3604df |
+ if (ret)
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+
|
|
|
3604df |
+ ctx->refreshed = _gf_true;
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
+shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int ret = -1;
|
|
|
3604df |
+
|
|
|
3604df |
+ LOCK (&inode->lock);
|
|
|
3604df |
+ {
|
|
|
3604df |
+ ret = __shard_inode_ctx_set_refreshed_flag (inode, this);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ UNLOCK (&inode->lock);
|
|
|
3604df |
+
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+gf_boolean_t
|
|
|
3604df |
+__shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int ret = -1;
|
|
|
3604df |
+ shard_inode_ctx_t *ctx = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = __shard_inode_ctx_get (inode, this, &ctx;;
|
|
|
3604df |
+ /* If inode ctx get fails, better to err on the side of caution and
|
|
|
3604df |
+ * try again? Unless the failure is due to mem-allocation.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ if (ret)
|
|
|
3604df |
+ return _gf_true;
|
|
|
3604df |
+
|
|
|
3604df |
+ return !ctx->refreshed;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+gf_boolean_t
|
|
|
3604df |
+shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ gf_boolean_t flag = _gf_false;
|
|
|
3604df |
+
|
|
|
3604df |
+ LOCK (&inode->lock);
|
|
|
3604df |
+ {
|
|
|
3604df |
+ flag = __shard_inode_ctx_needs_lookup (inode, this);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ UNLOCK (&inode->lock);
|
|
|
3604df |
+
|
|
|
3604df |
+ return flag;
|
|
|
3604df |
+}
|
|
|
3604df |
+int
|
|
|
3604df |
__shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf)
|
|
|
3604df |
{
|
|
|
3604df |
int ret = -1;
|
|
|
3604df |
@@ -756,7 +813,7 @@ out:
|
|
|
3604df |
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
-static void
|
|
|
3604df |
+static inode_t *
|
|
|
3604df |
shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
|
|
|
3604df |
struct iatt *buf)
|
|
|
3604df |
{
|
|
|
3604df |
@@ -765,10 +822,72 @@ shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
|
|
|
3604df |
|
|
|
3604df |
priv = THIS->private;
|
|
|
3604df |
|
|
|
3604df |
- linked_inode = inode_link (inode, local->dot_shard_loc.parent,
|
|
|
3604df |
- local->dot_shard_loc.name, buf);
|
|
|
3604df |
+ linked_inode = inode_link (inode, inode->table->root, ".shard", buf);
|
|
|
3604df |
inode_lookup (linked_inode);
|
|
|
3604df |
priv->dot_shard_inode = linked_inode;
|
|
|
3604df |
+ return linked_inode;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
+shard_refresh_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
3604df |
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
|
|
|
3604df |
+ struct iatt *buf, dict_t *xdata,
|
|
|
3604df |
+ struct iatt *postparent)
|
|
|
3604df |
+{
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ local = frame->local;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (op_ret) {
|
|
|
3604df |
+ local->op_ret = op_ret;
|
|
|
3604df |
+ local->op_errno = op_errno;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ /* To-Do: Fix refcount increment per call to
|
|
|
3604df |
+ * shard_link_dot_shard_inode().
|
|
|
3604df |
+ */
|
|
|
3604df |
+ shard_link_dot_shard_inode (local, inode, buf);
|
|
|
3604df |
+ shard_inode_ctx_set_refreshed_flag (inode, this);
|
|
|
3604df |
+out:
|
|
|
3604df |
+ shard_common_resolve_shards (frame, this, local->post_res_handler);
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
+shard_refresh_dot_shard (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ loc_t loc = {0,};
|
|
|
3604df |
+ inode_t *inode = NULL;
|
|
|
3604df |
+ shard_priv_t *priv = NULL;
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ local = frame->local;
|
|
|
3604df |
+ priv = this->private;
|
|
|
3604df |
+
|
|
|
3604df |
+ inode = inode_find (this->itable, priv->dot_shard_gfid);
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!shard_inode_ctx_needs_lookup (inode, this)) {
|
|
|
3604df |
+ local->op_ret = 0;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ /* Plain assignment because the ref is already taken above through
|
|
|
3604df |
+ * call to inode_find()
|
|
|
3604df |
+ */
|
|
|
3604df |
+ loc.inode = inode;
|
|
|
3604df |
+ gf_uuid_copy (loc.gfid, priv->dot_shard_gfid);
|
|
|
3604df |
+
|
|
|
3604df |
+ STACK_WIND (frame, shard_refresh_dot_shard_cbk, FIRST_CHILD(this),
|
|
|
3604df |
+ FIRST_CHILD(this)->fops->lookup, &loc, NULL);
|
|
|
3604df |
+ loc_wipe (&loc;;
|
|
|
3604df |
+
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+
|
|
|
3604df |
+out:
|
|
|
3604df |
+ shard_common_resolve_shards (frame, this, local->post_res_handler);
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
@@ -777,7 +896,8 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
3604df |
struct iatt *buf, dict_t *xdata,
|
|
|
3604df |
struct iatt *postparent)
|
|
|
3604df |
{
|
|
|
3604df |
- shard_local_t *local = NULL;
|
|
|
3604df |
+ inode_t *link_inode = NULL;
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
|
|
|
3604df |
local = frame->local;
|
|
|
3604df |
|
|
|
3604df |
@@ -797,8 +917,14 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
3604df |
goto unwind;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- shard_link_dot_shard_inode (local, inode, buf);
|
|
|
3604df |
- shard_common_resolve_shards (frame, this, local->post_res_handler);
|
|
|
3604df |
+ link_inode = shard_link_dot_shard_inode (local, inode, buf);
|
|
|
3604df |
+ if (link_inode != inode) {
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ shard_inode_ctx_set_refreshed_flag (link_inode, this);
|
|
|
3604df |
+ shard_common_resolve_shards (frame, this,
|
|
|
3604df |
+ local->post_res_handler);
|
|
|
3604df |
+ }
|
|
|
3604df |
return 0;
|
|
|
3604df |
|
|
|
3604df |
unwind:
|
|
|
3604df |
@@ -1843,8 +1969,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
shard_lookup_dot_shard (frame, this,
|
|
|
3604df |
shard_post_resolve_truncate_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
- shard_common_resolve_shards (frame, this,
|
|
|
3604df |
- shard_post_resolve_truncate_handler);
|
|
|
3604df |
+ local->post_res_handler = shard_post_resolve_truncate_handler;
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
return 0;
|
|
|
3604df |
|
|
|
3604df |
@@ -2273,8 +2399,8 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
3604df |
shard_lookup_dot_shard (frame, this,
|
|
|
3604df |
shard_post_resolve_unlink_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
- shard_common_resolve_shards (frame, this,
|
|
|
3604df |
- shard_post_resolve_unlink_handler);
|
|
|
3604df |
+ local->post_res_handler = shard_post_resolve_unlink_handler;
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
return 0;
|
|
|
3604df |
@@ -2624,8 +2750,8 @@ shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
shard_lookup_dot_shard (frame, this,
|
|
|
3604df |
shard_post_resolve_unlink_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
- shard_common_resolve_shards (frame, this,
|
|
|
3604df |
- shard_post_resolve_unlink_handler);
|
|
|
3604df |
+ local->post_res_handler = shard_post_resolve_unlink_handler;
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
return 0;
|
|
|
3604df |
@@ -3388,8 +3514,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
shard_lookup_dot_shard (frame, this,
|
|
|
3604df |
shard_post_resolve_readv_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
- shard_common_resolve_shards (frame, this,
|
|
|
3604df |
- shard_post_resolve_readv_handler);
|
|
|
3604df |
+ local->post_res_handler = shard_post_resolve_readv_handler;
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
return 0;
|
|
|
3604df |
|
|
|
3604df |
@@ -3849,7 +3975,8 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
struct iatt *buf, struct iatt *preparent,
|
|
|
3604df |
struct iatt *postparent, dict_t *xdata)
|
|
|
3604df |
{
|
|
|
3604df |
- shard_local_t *local = NULL;
|
|
|
3604df |
+ inode_t *link_inode = NULL;
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
|
|
|
3604df |
local = frame->local;
|
|
|
3604df |
|
|
|
3604df |
@@ -3869,8 +3996,15 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- shard_link_dot_shard_inode (local, inode, buf);
|
|
|
3604df |
-
|
|
|
3604df |
+ link_inode = shard_link_dot_shard_inode (local, inode, buf);
|
|
|
3604df |
+ if (link_inode != inode) {
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ shard_inode_ctx_set_refreshed_flag (link_inode, this);
|
|
|
3604df |
+ shard_common_resolve_shards (frame, this,
|
|
|
3604df |
+ local->post_res_handler);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
unwind:
|
|
|
3604df |
shard_common_resolve_shards (frame, this, local->post_res_handler);
|
|
|
3604df |
return 0;
|
|
|
3604df |
@@ -4638,8 +4772,8 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
|
|
|
3604df |
shard_mkdir_dot_shard (frame, this,
|
|
|
3604df |
shard_common_inode_write_post_resolve_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
- shard_common_resolve_shards (frame, this,
|
|
|
3604df |
- shard_common_inode_write_post_resolve_handler);
|
|
|
3604df |
+ local->post_res_handler = shard_common_inode_write_post_resolve_handler;
|
|
|
3604df |
+ shard_refresh_dot_shard (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
return 0;
|
|
|
3604df |
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
|
3604df |
index f626fae..09232a4 100644
|
|
|
3604df |
--- a/xlators/features/shard/src/shard.h
|
|
|
3604df |
+++ b/xlators/features/shard/src/shard.h
|
|
|
3604df |
@@ -268,6 +268,7 @@ typedef struct shard_inode_ctx {
|
|
|
3604df |
struct list_head ilist;
|
|
|
3604df |
uuid_t base_gfid;
|
|
|
3604df |
int block_num;
|
|
|
3604df |
+ gf_boolean_t refreshed;
|
|
|
3604df |
} shard_inode_ctx_t;
|
|
|
3604df |
|
|
|
3604df |
#endif /* __SHARD_H__ */
|
|
|
3604df |
--
|
|
|
3604df |
2.9.3
|
|
|
3604df |
|