From 90f7e532ea9ab8136d9f9bbd8e80d61390cd667e Mon Sep 17 00:00:00 2001
From: Poornima G <pgurusid@redhat.com>
Date: Mon, 21 Nov 2016 11:49:35 +0530
Subject: [PATCH 202/206] afr: Fix the EIO that can occur in afr_inode_refresh
as a result of cache invalidation(upcall).
Backport of http://review.gluster.org/15892
Conflicts:
xlators/cluster/afr/src/afr.h
Issue:
------
When a cache invalidation is recieved as a result of changing
pending xattr, the read_subvol is reset. Consider the below chain
of execution:
CHILD_DOWN
...
afr_readv
...
afr_inode_refresh
...
afr_inode_read_subvol_reset <- as a result of pending xattr set by
some other client GF_EVENT_UPCALL will
be sent
afr_refresh_done -> this results in an EIO, as the read subvol was
reset by the end of the afr_inode_refresh
Solution:
---------
When GF_EVENT_UPCALL is recieved, instead of resetting read_subvol,
set a variable need_refresh in inode_ctx, the next time some one
starts a txn, along with event gen, need_rrefresh also needs to
be checked.
>Reviewed-on: http://review.gluster.org/15892
>Reviewed-by: Ravishankar N <ravishankar@redhat.com>
>Smoke: Gluster Build System <jenkins@build.gluster.org>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
>Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Change-Id: Ifda21a7a8039b8874215e1afa4bdf20f7d991b58
BUG: 1393758
Signed-off-by: Poornima G <pgurusid@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/91495
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 90 +++++++++++++++++++++++++------
xlators/cluster/afr/src/afr-read-txn.c | 3 +-
xlators/cluster/afr/src/afr-transaction.c | 4 +-
xlators/cluster/afr/src/afr.h | 4 ++
4 files changed, 83 insertions(+), 18 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2d9b496..1d1882f 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -595,6 +595,62 @@ out:
}
+/* The caller of this should perform afr_inode_refresh, if this function
+ * returns _gf_true
+ */
+gf_boolean_t
+afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
+ int event_gen1, int event_gen2)
+{
+ gf_boolean_t need_refresh = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret)
+ goto unlock;
+
+ need_refresh = ctx->need_refresh;
+ /* Hoping that the caller will do inode_refresh followed by
+ * this, hence setting the need_refresh to false */
+ ctx->need_refresh = _gf_false;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (event_gen1 != event_gen2)
+ need_refresh = _gf_true;
+out:
+ return need_refresh;
+}
+
+
+static int
+afr_inode_need_refresh_set (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret)
+ goto unlock;
+
+ ctx->need_refresh = _gf_true;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
int
afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
@@ -2701,7 +2757,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
afr_read_subvol_get (loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (event != local->event_generation)
+ if (afr_is_inode_refresh_reqd (loc->inode, this, event,
+ local->event_generation))
afr_inode_refresh (frame, this, loc->inode, NULL,
afr_discover_do);
else
@@ -2852,7 +2909,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (event != local->event_generation)
+ if (afr_is_inode_refresh_reqd (loc->inode, this, event,
+ local->event_generation))
afr_inode_refresh (frame, this, loc->parent, NULL,
afr_lookup_do);
else
@@ -4627,22 +4685,22 @@ afr_notify (xlator_t *this, int32_t event,
* pronounced. Hence when a pending xattr is set notify
* all the md-cache clients to invalidate the existing
* stat cache and send the lookup next time */
- if (up_ci->dict) {
- for (i = 0; i < priv->child_count; i++) {
- if (dict_get (up_ci->dict, priv->pending_key[i])) {
- ret = dict_set_int8 (up_ci->dict,
- MDC_INVALIDATE_IATT , 0);
- break;
- }
+ if (!up_ci->dict)
+ break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get (up_ci->dict, priv->pending_key[i])) {
+ ret = dict_set_int8 (up_ci->dict,
+ MDC_INVALIDATE_IATT, 0);
+ itable = ((xlator_t *)this->graph->top)->itable;
+ /*Internal processes may not have itable for top xlator*/
+ if (itable)
+ inode = inode_find (itable, up_data->gfid);
+ if (inode)
+ afr_inode_need_refresh_set (inode, this);
+
+ break;
}
}
- itable = ((xlator_t *)this->graph->top)->itable;
- /*Internal processes may not have itable for top xlator*/
- if (itable)
- inode = inode_find (itable, up_data->gfid);
- if (inode)
- afr_inode_read_subvol_reset (inode, this);
-
break;
default:
propagate = 1;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 1c7b4f9..fa98d37 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -221,7 +221,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, "
"%d", uuid_utoa (inode->gfid), local->event_generation,
event_generation);
- if (local->event_generation != event_generation)
+ if (afr_is_inode_refresh_reqd (inode, this, local->event_generation,
+ event_generation))
/* servers have disconnected / reconnected, and possibly
rebooted, very likely changing the state of freshness
of copies */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 02a968e..62b680c 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2575,7 +2575,9 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
ret = afr_inode_get_readable (frame, local->inode, this,
local->readable, &event_generation, type);
- if (ret < 0 || event_generation != priv->event_generation) {
+ if (ret < 0 || afr_is_inode_refresh_reqd (local->inode, this,
+ priv->event_generation,
+ event_generation)) {
afr_inode_refresh (frame, this, local->inode, local->loc.gfid,
afr_write_txn_refresh_done);
} else {
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index a9cab13..c84c3af 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -813,6 +813,7 @@ typedef struct _afr_inode_ctx {
uint64_t read_subvol;
int spb_choice;
gf_timer_t *timer;
+ gf_boolean_t need_refresh;
} afr_inode_ctx_t;
typedef struct afr_spbc_timeout {
@@ -1221,4 +1222,7 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
int32_t
afr_quorum_errno (afr_private_t *priv);
+gf_boolean_t
+afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
+ int event_gen1, int event_gen2);
#endif /* __AFR_H__ */
--
2.9.3