3604df
From 90f7e532ea9ab8136d9f9bbd8e80d61390cd667e Mon Sep 17 00:00:00 2001
3604df
From: Poornima G <pgurusid@redhat.com>
3604df
Date: Mon, 21 Nov 2016 11:49:35 +0530
3604df
Subject: [PATCH 202/206] afr: Fix the EIO that can occur in afr_inode_refresh
3604df
 as a result      of cache invalidation(upcall).
3604df
3604df
Backport of http://review.gluster.org/15892
3604df
Conflicts:
3604df
        xlators/cluster/afr/src/afr.h
3604df
3604df
Issue:
3604df
------
3604df
When a cache invalidation is recieved as a result of changing
3604df
pending xattr, the read_subvol is reset. Consider the below chain
3604df
of execution:
3604df
3604df
CHILD_DOWN
3604df
...
3604df
afr_readv
3604df
...
3604df
afr_inode_refresh
3604df
...
3604df
afr_inode_read_subvol_reset <- as a result of pending xattr set by
3604df
                               some other client GF_EVENT_UPCALL will
3604df
                               be sent
3604df
afr_refresh_done -> this results in an EIO, as the read subvol was
3604df
                    reset by the end of the afr_inode_refresh
3604df
3604df
Solution:
3604df
---------
3604df
When GF_EVENT_UPCALL is recieved, instead of resetting read_subvol,
3604df
set a variable need_refresh in inode_ctx, the next time some one
3604df
starts a txn, along with event gen, need_rrefresh also needs to
3604df
be checked.
3604df
3604df
>Reviewed-on: http://review.gluster.org/15892
3604df
>Reviewed-by: Ravishankar N <ravishankar@redhat.com>
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
3604df
Change-Id: Ifda21a7a8039b8874215e1afa4bdf20f7d991b58
3604df
BUG: 1393758
3604df
Signed-off-by: Poornima G <pgurusid@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/91495
3604df
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
---
3604df
 xlators/cluster/afr/src/afr-common.c      | 90 +++++++++++++++++++++++++------
3604df
 xlators/cluster/afr/src/afr-read-txn.c    |  3 +-
3604df
 xlators/cluster/afr/src/afr-transaction.c |  4 +-
3604df
 xlators/cluster/afr/src/afr.h             |  4 ++
3604df
 4 files changed, 83 insertions(+), 18 deletions(-)
3604df
3604df
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
3604df
index 2d9b496..1d1882f 100644
3604df
--- a/xlators/cluster/afr/src/afr-common.c
3604df
+++ b/xlators/cluster/afr/src/afr-common.c
3604df
@@ -595,6 +595,62 @@ out:
3604df
 }
3604df
 
3604df
 
3604df
+/* The caller of this should perform afr_inode_refresh, if this function
3604df
+ * returns _gf_true
3604df
+ */
3604df
+gf_boolean_t
3604df
+afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
3604df
+                           int event_gen1, int event_gen2)
3604df
+{
3604df
+        gf_boolean_t     need_refresh = _gf_false;
3604df
+        afr_inode_ctx_t  *ctx         = NULL;
3604df
+        int              ret          = -1;
3604df
+
3604df
+        GF_VALIDATE_OR_GOTO (this->name, inode, out);
3604df
+
3604df
+        LOCK(&inode->lock);
3604df
+        {
3604df
+                ret = __afr_inode_ctx_get (this, inode, &ctx;;
3604df
+                if (ret)
3604df
+                        goto unlock;
3604df
+
3604df
+                need_refresh = ctx->need_refresh;
3604df
+                /* Hoping that the caller will do inode_refresh followed by
3604df
+                 * this, hence setting the need_refresh to false */
3604df
+                ctx->need_refresh = _gf_false;
3604df
+        }
3604df
+unlock:
3604df
+        UNLOCK(&inode->lock);
3604df
+
3604df
+        if (event_gen1 != event_gen2)
3604df
+                need_refresh = _gf_true;
3604df
+out:
3604df
+        return need_refresh;
3604df
+}
3604df
+
3604df
+
3604df
+static int
3604df
+afr_inode_need_refresh_set (inode_t *inode, xlator_t *this)
3604df
+{
3604df
+        int               ret         = -1;
3604df
+        afr_inode_ctx_t  *ctx         = NULL;
3604df
+
3604df
+        GF_VALIDATE_OR_GOTO (this->name, inode, out);
3604df
+
3604df
+        LOCK(&inode->lock);
3604df
+        {
3604df
+                ret = __afr_inode_ctx_get (this, inode, &ctx;;
3604df
+                if (ret)
3604df
+                        goto unlock;
3604df
+
3604df
+                ctx->need_refresh = _gf_true;
3604df
+        }
3604df
+unlock:
3604df
+        UNLOCK(&inode->lock);
3604df
+out:
3604df
+        return ret;
3604df
+}
3604df
+
3604df
 int
3604df
 afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
3604df
 {
3604df
@@ -2701,7 +2757,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
3604df
 	afr_read_subvol_get (loc->inode, this, NULL, NULL, &event,
3604df
 			     AFR_DATA_TRANSACTION, NULL);
3604df
 
3604df
-	if (event != local->event_generation)
3604df
+	if (afr_is_inode_refresh_reqd (loc->inode, this, event,
3604df
+                                       local->event_generation))
3604df
 		afr_inode_refresh (frame, this, loc->inode, NULL,
3604df
                                    afr_discover_do);
3604df
 	else
3604df
@@ -2852,7 +2909,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
3604df
 	afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,
3604df
 			     AFR_DATA_TRANSACTION, NULL);
3604df
 
3604df
-	if (event != local->event_generation)
3604df
+	if (afr_is_inode_refresh_reqd (loc->inode, this, event,
3604df
+                                       local->event_generation))
3604df
 		afr_inode_refresh (frame, this, loc->parent, NULL,
3604df
                                    afr_lookup_do);
3604df
 	else
3604df
@@ -4627,22 +4685,22 @@ afr_notify (xlator_t *this, int32_t event,
3604df
                          * pronounced. Hence when a pending xattr is set notify
3604df
                          * all the md-cache clients to invalidate the existing
3604df
                          * stat cache and send the lookup next time */
3604df
-                        if (up_ci->dict) {
3604df
-                                for (i = 0; i < priv->child_count; i++) {
3604df
-                                        if (dict_get (up_ci->dict, priv->pending_key[i])) {
3604df
-                                                 ret = dict_set_int8 (up_ci->dict,
3604df
-                                                                      MDC_INVALIDATE_IATT , 0);
3604df
-                                                 break;
3604df
-                                        }
3604df
+                        if (!up_ci->dict)
3604df
+                                break;
3604df
+                        for (i = 0; i < priv->child_count; i++) {
3604df
+                                if (dict_get (up_ci->dict, priv->pending_key[i])) {
3604df
+                                         ret = dict_set_int8 (up_ci->dict,
3604df
+                                                              MDC_INVALIDATE_IATT, 0);
3604df
+                                         itable = ((xlator_t *)this->graph->top)->itable;
3604df
+                                         /*Internal processes may not have itable for top xlator*/
3604df
+                                         if (itable)
3604df
+                                                 inode = inode_find (itable, up_data->gfid);
3604df
+                                         if (inode)
3604df
+                                                 afr_inode_need_refresh_set (inode, this);
3604df
+
3604df
+                                         break;
3604df
                                 }
3604df
                         }
3604df
-                        itable = ((xlator_t *)this->graph->top)->itable;
3604df
-                       /*Internal processes may not have itable for top xlator*/
3604df
-                        if (itable)
3604df
-                                inode = inode_find (itable, up_data->gfid);
3604df
-                        if (inode)
3604df
-                                afr_inode_read_subvol_reset (inode, this);
3604df
-
3604df
                         break;
3604df
                 default:
3604df
                         propagate = 1;
3604df
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
3604df
index 1c7b4f9..fa98d37 100644
3604df
--- a/xlators/cluster/afr/src/afr-read-txn.c
3604df
+++ b/xlators/cluster/afr/src/afr-read-txn.c
3604df
@@ -221,7 +221,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
3604df
         gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, "
3604df
                       "%d", uuid_utoa (inode->gfid), local->event_generation,
3604df
                       event_generation);
3604df
-	if (local->event_generation != event_generation)
3604df
+	if (afr_is_inode_refresh_reqd (inode, this, local->event_generation,
3604df
+                                       event_generation))
3604df
 		/* servers have disconnected / reconnected, and possibly
3604df
 		   rebooted, very likely changing the state of freshness
3604df
 		   of copies */
3604df
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
3604df
index 02a968e..62b680c 100644
3604df
--- a/xlators/cluster/afr/src/afr-transaction.c
3604df
+++ b/xlators/cluster/afr/src/afr-transaction.c
3604df
@@ -2575,7 +2575,9 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
3604df
 
3604df
         ret = afr_inode_get_readable (frame, local->inode, this,
3604df
                                       local->readable, &event_generation, type);
3604df
-        if (ret < 0 || event_generation != priv->event_generation) {
3604df
+        if (ret < 0 || afr_is_inode_refresh_reqd (local->inode, this,
3604df
+                                                  priv->event_generation,
3604df
+                                                  event_generation)) {
3604df
                 afr_inode_refresh (frame, this, local->inode, local->loc.gfid,
3604df
                                    afr_write_txn_refresh_done);
3604df
         } else {
3604df
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
3604df
index a9cab13..c84c3af 100644
3604df
--- a/xlators/cluster/afr/src/afr.h
3604df
+++ b/xlators/cluster/afr/src/afr.h
3604df
@@ -813,6 +813,7 @@ typedef struct _afr_inode_ctx {
3604df
         uint64_t        read_subvol;
3604df
         int             spb_choice;
3604df
         gf_timer_t      *timer;
3604df
+        gf_boolean_t    need_refresh;
3604df
 } afr_inode_ctx_t;
3604df
 
3604df
 typedef struct afr_spbc_timeout {
3604df
@@ -1221,4 +1222,7 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
3604df
 int32_t
3604df
 afr_quorum_errno (afr_private_t *priv);
3604df
 
3604df
+gf_boolean_t
3604df
+afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
3604df
+                           int event_gen1, int event_gen2);
3604df
 #endif /* __AFR_H__ */
3604df
-- 
3604df
2.9.3
3604df