Blob Blame History Raw
From 4a152008a4f17ff9373978535288f32963c6f87b Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Mon, 5 Jun 2017 09:40:51 +0530
Subject: [PATCH 485/486] afr: add errno to afr_inode_refresh_done()

Backport of https://review.gluster.org/17413 and
https://review.gluster.org/17436

Problem:
When parellel `rm -rf`s were being done from cifs clients, opendir might
fail on some replicas with ENOENT. DHT ignores partial opendir failures
in dht_fd_cbk() and winds readdirs on those replicas. Afr inode refresh
(as a part of readdirp read_txn) sees in its fd context that the state
of the fds is *not* AFR_FD_OPENED and bails out to
afr_inode_refresh_done() without doing a refresh. When this happens, the
errno is set as EIO due to lack of readable subvols, logging split-brain
messages in the logs.

Fix:
Introduce an errno argument to afr_inode_refresh_do() to bail out with
the right error value when inode refresh is not performed.

Change-Id: I015b38d80653961feadb0d07b038e08c19911c33
BUG: 1454689
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/108106
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
---
 xlators/cluster/afr/src/afr-common.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 316cc40..4b8334d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1142,7 +1142,7 @@ refresh_done:
 }
 
 int
-afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
 {
 	call_frame_t *heal_frame = NULL;
 	afr_local_t *local = NULL;
@@ -1152,6 +1152,11 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
 	int ret = 0;
 	int err = 0;
 
+	if (error != 0) {
+		err = error;
+		goto refresh_done;
+	}
+
 	local = frame->local;
 
 	ret = afr_replies_interpret (frame, this, local->refreshinode,
@@ -1215,7 +1220,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         call_count = afr_frame_return (frame);
         if (call_count == 0) {
                 afr_set_need_heal (this, local);
-		afr_inode_refresh_done (frame, this);
+		afr_inode_refresh_done (frame, this, 0);
         }
 
 }
@@ -1306,20 +1311,21 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
         if (local->fd) {
                 fd_ctx = afr_fd_ctx_get (local->fd, this);
                 if (!fd_ctx) {
-                        afr_inode_refresh_done (frame, this);
+                        afr_inode_refresh_done (frame, this, EINVAL);
                         return 0;
                 }
         }
 
 	xdata = dict_new ();
 	if (!xdata) {
-		afr_inode_refresh_done (frame, this);
+		afr_inode_refresh_done (frame, this, ENOMEM);
 		return 0;
 	}
 
-	if (afr_xattr_req_prepare (this, xdata) != 0) {
+	ret = afr_xattr_req_prepare (this, xdata);
+	if (ret != 0) {
 		dict_unref (xdata);
-		afr_inode_refresh_done (frame, this);
+		afr_inode_refresh_done (frame, this, -ret);
 		return 0;
 	}
 
@@ -1352,7 +1358,10 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
 	call_count = local->call_count;
         if (!call_count) {
                 dict_unref (xdata);
-                afr_inode_refresh_done (frame, this);
+		if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+	                afr_inode_refresh_done (frame, this, EBADFD);
+		else
+	                afr_inode_refresh_done (frame, this, ENOTCONN);
                 return 0;
         }
 	for (i = 0; i < priv->child_count; i++) {
-- 
1.8.3.1