887953
From e3f6fc1ccff95145b94aea9405cd136ada9000bc Mon Sep 17 00:00:00 2001
887953
From: Ravishankar N <ravishankar@redhat.com>
887953
Date: Wed, 16 Aug 2017 18:01:17 +0530
887953
Subject: [PATCH 476/493] afr: add checks for allowing lookups
887953
887953
Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/17673/
887953
887953
Problem:
887953
In an arbiter volume, lookup was being served from one of the sink
887953
bricks (source brick was down). shard uses the iatt values from lookup cbk
887953
to calculate the size and block count, which in this case were incorrect
887953
values. shard_local_t->last_block was thus initialised to -1, resulting
887953
in an infinite while loop in shard_common_resolve_shards().
887953
887953
Fix:
887953
Use client quorum logic to allow or fail the lookups from afr if there
887953
are no readable subvolumes. So in replica-3 or arbiter vols, if there is
887953
no good copy or if quorum is not met, fail lookup with ENOTCONN.
887953
887953
With this fix, we are also removing support for quorum-reads xlator
887953
option. So if quorum is not met, neither read nor write txns are allowed
887953
and we fail the fop with ENOTCONN.
887953
887953
Change-Id: Ic65c00c24f77ece007328b421494eee62a505fa0
887953
BUG: 1362129
887953
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/158650
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
887953
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
887953
---
887953
 tests/basic/afr/quorum.t               |  23 ----
887953
 tests/bugs/replicate/bug-977797.t      |   2 +
887953
 xlators/cluster/afr/src/afr-common.c   | 244 +++++++++++++++++++++------------
887953
 xlators/cluster/afr/src/afr-read-txn.c |   3 +-
887953
 xlators/cluster/afr/src/afr.c          |   7 +-
887953
 xlators/cluster/afr/src/afr.h          |   1 -
887953
 6 files changed, 164 insertions(+), 116 deletions(-)
887953
887953
diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t
887953
index 252e254..58116ba 100644
887953
--- a/tests/basic/afr/quorum.t
887953
+++ b/tests/basic/afr/quorum.t
887953
@@ -31,11 +31,7 @@ TEST $CLI volume set $V0 cluster.quorum-count 2
887953
 TEST test_write
887953
 TEST kill_brick $V0 $H0 $B0/${V0}1
887953
 TEST ! test_write
887953
-EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume set $V0 cluster.quorum-reads on
887953
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
887953
 TEST ! cat $M0/b
887953
-TEST $CLI volume reset $V0 cluster.quorum-reads
887953
 
887953
 TEST $CLI volume set $V0 cluster.quorum-type auto
887953
 EXPECT auto volume_option $V0 cluster.quorum-type
887953
@@ -44,11 +40,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
887953
 TEST test_write
887953
 TEST kill_brick $V0 $H0 $B0/${V0}1
887953
 TEST ! test_write
887953
-EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume set $V0 cluster.quorum-reads on
887953
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
887953
 TEST ! cat $M0/b
887953
-TEST $CLI volume reset $V0 cluster.quorum-reads
887953
 
887953
 TEST $CLI volume set $V0 cluster.quorum-type none
887953
 EXPECT none volume_option $V0 cluster.quorum-type
887953
@@ -57,11 +49,6 @@ TEST test_write
887953
 TEST $CLI volume reset $V0 cluster.quorum-type
887953
 TEST test_write
887953
 EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume set $V0 cluster.quorum-reads on
887953
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
887953
-EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume reset $V0 cluster.quorum-reads
887953
-
887953
 
887953
 cleanup;
887953
 TEST glusterd;
887953
@@ -86,24 +73,14 @@ TEST $CLI volume set $V0 cluster.quorum-count 3
887953
 TEST test_write
887953
 TEST kill_brick $V0 $H0 $B0/${V0}1
887953
 TEST ! test_write
887953
-EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume set $V0 cluster.quorum-reads on
887953
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
887953
 TEST ! cat $M0/b
887953
-TEST $CLI volume reset $V0 cluster.quorum-reads
887953
-
887953
 
887953
 TEST $CLI volume set $V0 cluster.quorum-type auto
887953
 EXPECT auto volume_option $V0 cluster.quorum-type
887953
 TEST test_write
887953
 TEST kill_brick $V0 $H0 $B0/${V0}3
887953
 TEST ! test_write
887953
-EXPECT "abc" cat $M0/b
887953
-TEST $CLI volume set $V0 cluster.quorum-reads on
887953
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
887953
 TEST ! cat $M0/b
887953
-TEST $CLI volume reset $V0 cluster.quorum-reads
887953
-
887953
 
887953
 TEST $CLI volume set $V0 cluster.quorum-type none
887953
 EXPECT none volume_option $V0 cluster.quorum-type
887953
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
887953
index ea9a98a..fee8205 100755
887953
--- a/tests/bugs/replicate/bug-977797.t
887953
+++ b/tests/bugs/replicate/bug-977797.t
887953
@@ -53,6 +53,8 @@ TEST chmod 757 $M0/a/file
887953
 TEST $CLI volume start $V0 force
887953
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
887953
 
887953
+#Trigger entry heal of $M0/a
887953
+getfattr -n user.nosuchattr $M0/a
887953
 dd if=$M0/a/file of=/dev/null bs=1024k
887953
 #read fails, but heal is triggered.
887953
 TEST [ $? -ne 0 ]
887953
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
887953
index 10d9620..231de9d 100644
887953
--- a/xlators/cluster/afr/src/afr-common.c
887953
+++ b/xlators/cluster/afr/src/afr-common.c
887953
@@ -47,9 +47,7 @@
887953
 int32_t
887953
 afr_quorum_errno (afr_private_t *priv)
887953
 {
887953
-        if (priv->quorum_reads)
887953
-                return ENOTCONN;
887953
-        return EROFS;
887953
+        return ENOTCONN;
887953
 }
887953
 
887953
 int
887953
@@ -1154,8 +1152,6 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
887953
 	return ret;
887953
 }
887953
 
887953
-
887953
-
887953
 int
887953
 afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
887953
 {
887953
@@ -1726,6 +1722,29 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
887953
 	return ret;
887953
 }
887953
 
887953
+void
887953
+afr_readables_intersect_get (inode_t *inode, xlator_t *this, int *event,
887953
+                             unsigned char *intersection)
887953
+{
887953
+	afr_private_t *priv = NULL;
887953
+	unsigned char *data_readable = NULL;
887953
+	unsigned char *metadata_readable = NULL;
887953
+	unsigned char *intersect = NULL;
887953
+
887953
+	priv = this->private;
887953
+	data_readable = alloca0 (priv->child_count);
887953
+	metadata_readable = alloca0 (priv->child_count);
887953
+	intersect = alloca0 (priv->child_count);
887953
+
887953
+	afr_inode_read_subvol_get (inode, this, data_readable,
887953
+                               metadata_readable, event);
887953
+
887953
+	AFR_INTERSECT (intersect, data_readable, metadata_readable,
887953
+		       priv->child_count);
887953
+        if (intersection)
887953
+                memcpy (intersection, intersect,
887953
+                        sizeof (*intersection) * priv->child_count);
887953
+}
887953
 
887953
 int
887953
 afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
887953
@@ -1734,8 +1753,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
887953
                      afr_read_subvol_args_t *args)
887953
 {
887953
 	afr_private_t *priv = NULL;
887953
-	unsigned char *data_readable = NULL;
887953
-	unsigned char *metadata_readable = NULL;
887953
 	unsigned char *readable = NULL;
887953
 	unsigned char *intersection = NULL;
887953
 	int subvol = -1;
887953
@@ -1744,17 +1761,11 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
887953
 	priv = this->private;
887953
 
887953
 	readable = alloca0 (priv->child_count);
887953
-	data_readable = alloca0 (priv->child_count);
887953
-	metadata_readable = alloca0 (priv->child_count);
887953
 	intersection = alloca0 (priv->child_count);
887953
 
887953
 	afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
887953
 
887953
-	afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
887953
-				   &event);
887953
-
887953
-	AFR_INTERSECT (intersection, data_readable, metadata_readable,
887953
-		       priv->child_count);
887953
+        afr_readables_intersect_get (inode, this, &event, intersection);
887953
 
887953
 	if (AFR_COUNT (intersection, priv->child_count) > 0)
887953
 		subvol = afr_read_subvol_select_by_policy (inode, this,
887953
@@ -2188,18 +2199,28 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
887953
 
887953
 int
887953
 afr_read_subvol_decide (inode_t *inode, xlator_t *this,
887953
-                        afr_read_subvol_args_t *args)
887953
+                        afr_read_subvol_args_t *args, unsigned char *readable)
887953
 {
887953
-        int data_subvol  = -1;
887953
-        int mdata_subvol = -1;
887953
+        int event = 0;
887953
+        afr_private_t *priv = NULL;
887953
+        unsigned char *intersection = NULL;
887953
+
887953
+        priv = this->private;
887953
+        intersection = alloca0 (priv->child_count);
887953
+
887953
+        afr_readables_intersect_get (inode, this, &event, intersection);
887953
 
887953
-        data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args);
887953
-        mdata_subvol = afr_metadata_subvol_get (inode, this,
887953
-                                                NULL, NULL, NULL, args);
887953
-        if (data_subvol == -1 || mdata_subvol == -1)
887953
+        if (AFR_COUNT (intersection, priv->child_count) <= 0) {
887953
+                /* TODO: If we have one brick with valid data_readable and
887953
+                 * another with metadata_readable, try to send an iatt with
887953
+                 * valid bits from both.*/
887953
                 return -1;
887953
+        }
887953
 
887953
-        return data_subvol;
887953
+        memcpy (readable, intersection, sizeof (*readable) * priv->child_count);
887953
+
887953
+        return afr_read_subvol_select_by_policy (inode, this, intersection,
887953
+                                                 args);
887953
 }
887953
 
887953
 static inline int
887953
@@ -2216,7 +2237,49 @@ afr_first_up_child (call_frame_t *frame, xlator_t *this)
887953
                 if (local->replies[i].valid &&
887953
                     local->replies[i].op_ret == 0)
887953
                         return i;
887953
-        return 0;
887953
+        return -1;
887953
+}
887953
+
887953
+static void
887953
+afr_attempt_readsubvol_set (call_frame_t *frame, xlator_t *this,
887953
+                            unsigned char *success_replies,
887953
+                            unsigned char *data_readable, int *read_subvol)
887953
+{
887953
+        afr_private_t *priv = NULL;
887953
+        afr_local_t *local = NULL;
887953
+        int spb_choice = -1;
887953
+        int child_count = -1;
887953
+
887953
+        if (*read_subvol != -1)
887953
+                return;
887953
+
887953
+        priv = this->private;
887953
+        local = frame->local;
887953
+        child_count = priv->child_count;
887953
+
887953
+        afr_inode_split_brain_choice_get (local->inode, this,
887953
+                                          &spb_choice);
887953
+        if ((spb_choice >= 0) &&
887953
+            (AFR_COUNT(success_replies, child_count) == child_count)) {
887953
+                *read_subvol = spb_choice;
887953
+        } else if (!priv->quorum_count) {
887953
+                *read_subvol = afr_first_up_child (frame, this);
887953
+        } else if (priv->quorum_count &&
887953
+                   afr_has_quorum (data_readable, this)) {
887953
+                /* read_subvol is guaranteed to be valid if we hit this path. */
887953
+                *read_subvol = afr_first_up_child (frame, this);
887953
+        } else {
887953
+               /* If quorum is enabled and we do not have a
887953
+                  readable yet, it means all good copies are down.
887953
+               */
887953
+               local->op_ret = -1;
887953
+               local->op_errno = ENOTCONN;
887953
+               gf_msg (this->name, GF_LOG_WARNING, 0,
887953
+                       AFR_MSG_READ_SUBVOL_ERROR, "no read "
887953
+                       "subvols for %s", local->loc.path);
887953
+        }
887953
+        if (*read_subvol >= 0)
887953
+                dict_del (local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
887953
 }
887953
 
887953
 static void
887953
@@ -2230,13 +2293,13 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
         int                 par_read_subvol = 0;
887953
         int                 ret         = -1;
887953
 	unsigned char      *readable = NULL;
887953
+        unsigned char      *success_replies = NULL;
887953
 	int                 event = 0;
887953
 	struct afr_reply   *replies = NULL;
887953
 	uuid_t              read_gfid = {0, };
887953
 	gf_boolean_t        locked_entry = _gf_false;
887953
 	gf_boolean_t        can_interpret = _gf_true;
887953
         inode_t            *parent = NULL;
887953
-        int                 spb_choice = -1;
887953
         ia_type_t           ia_type = IA_INVAL;
887953
         afr_read_subvol_args_t args = {0,};
887953
         char               *gfid_heal_msg = NULL;
887953
@@ -2250,11 +2313,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
                                                   this);
887953
 
887953
 	readable = alloca0 (priv->child_count);
887953
+	success_replies = alloca0 (priv->child_count);
887953
 
887953
 	afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
887953
+        par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
887953
+                                                      readable);
887953
 
887953
-        afr_inode_split_brain_choice_get (local->inode, this,
887953
-                                                &spb_choice);
887953
 	/* First, check if we have a gfid-change from somewhere,
887953
 	   If so, propagate that so that a fresh lookup can be
887953
 	   issued
887953
@@ -2262,13 +2326,17 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
         if (local->cont.lookup.needs_fresh_lookup) {
887953
                 local->op_ret = -1;
887953
                 local->op_errno = ESTALE;
887953
-                goto unwind;
887953
+                goto error;
887953
         }
887953
 
887953
 	op_errno = afr_final_errno (frame->local, this->private);
887953
 	local->op_errno = op_errno;
887953
 
887953
 	read_subvol = -1;
887953
+        for (i = 0; i < priv->child_count; i++)
887953
+                if (replies[i].valid && replies[i].op_ret == 0)
887953
+                        success_replies[i] = 1;
887953
+
887953
 	for (i = 0; i < priv->child_count; i++) {
887953
 		if (!replies[i].valid)
887953
 			continue;
887953
@@ -2277,9 +2345,9 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
 		    replies[i].op_errno == ENOENT) {
887953
 			/* Second, check entry is still
887953
 			   "underway" in creation */
887953
-			local->op_ret = -1;
887953
-			local->op_errno = ENOENT;
887953
-			goto unwind;
887953
+                        local->op_ret = -1;
887953
+                        local->op_errno = ENOENT;
887953
+                        goto error;
887953
 		}
887953
 
887953
 		if (replies[i].op_ret == -1)
887953
@@ -2293,8 +2361,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
 		}
887953
 	}
887953
 
887953
-	if (read_subvol == -1)
887953
-		goto unwind;
887953
+        if (read_subvol == -1)
887953
+                goto error;
887953
 	/* We now have a read_subvol, which is readable[] (if there
887953
 	   were any). Next we look for GFID mismatches. We don't
887953
 	   consider a GFID mismatch as an error if read_subvol is
887953
@@ -2318,58 +2386,61 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
887953
 		if (readable[read_subvol] && !readable[i])
887953
 			continue;
887953
 
887953
+                /* If we were called from glfsheal and there is still a gfid
887953
+                 * mismatch, succeed the lookup and let glfsheal print the
887953
+                 * response via gfid-heal-msg.*/
887953
+                if (!dict_get_str (local->xattr_req, "gfid-heal-msg",
887953
+                                   &gfid_heal_msg))
887953
+                        goto cant_interpret;
887953
+
887953
 		/* LOG ERROR */
887953
 		local->op_ret = -1;
887953
 		local->op_errno = EIO;
887953
-		goto unwind;
887953
+		goto error;
887953
 	}
887953
 
887953
 	/* Forth, for the finalized GFID, pick the best subvolume
887953
 	   to return stats from.
887953
 	*/
887953
+        read_subvol = -1;
887953
+        memset (readable, 0, sizeof (*readable) * priv->child_count);
887953
 	if (can_interpret) {
887953
+                if (!afr_has_quorum (success_replies, this))
887953
+                        goto cant_interpret;
887953
 		/* It is safe to call afr_replies_interpret() because we have
887953
 		   a response from all the UP subvolumes and all of them resolved
887953
 		   to the same GFID
887953
 		*/
887953
                 gf_uuid_copy (args.gfid, read_gfid);
887953
                 args.ia_type = ia_type;
887953
-		if (afr_replies_interpret (frame, this, local->inode, NULL)) {
887953
-                        read_subvol = afr_read_subvol_decide (local->inode,
887953
-                                                              this, &args);
887953
+		ret = afr_replies_interpret (frame, this, local->inode, NULL);
887953
+                read_subvol = afr_read_subvol_decide (local->inode, this, &args,
887953
+                                                      readable);
887953
+                if (read_subvol == -1)
887953
+                        goto cant_interpret;
887953
+                if (ret) {
887953
 			afr_inode_event_gen_reset (local->inode, this);
887953
-			goto cant_interpret;
887953
-		} else {
887953
-                        read_subvol = afr_data_subvol_get (local->inode, this,
887953
-                                                       NULL, NULL, NULL, &args);
887953
-		}
887953
+                        dict_del (local->replies[read_subvol].xdata,
887953
+                                  GF_CONTENT_KEY);
887953
+                }
887953
 	} else {
887953
 	cant_interpret:
887953
+                afr_attempt_readsubvol_set (frame, this, success_replies,
887953
+                                            readable, &read_subvol);
887953
                 if (read_subvol == -1) {
887953
-                        if (spb_choice >= 0)
887953
-                                read_subvol = spb_choice;
887953
-                        else
887953
-                                read_subvol = afr_first_up_child (frame, this);
887953
+                        goto error;
887953
                 }
887953
-		dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
887953
 	}
887953
 
887953
 	afr_handle_quota_size (frame, this);
887953
 
887953
-unwind:
887953
         afr_set_need_heal (this, local);
887953
-        if (read_subvol == -1) {
887953
-                if (spb_choice >= 0)
887953
-                        read_subvol = spb_choice;
887953
-                else
887953
-                        read_subvol = afr_first_up_child (frame, this);
887953
-
887953
-        }
887953
-        par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
887953
-                                                      readable);
887953
         if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
887953
-                        local->op_ret = -1;
887953
-                        local->op_errno = ENOTCONN;
887953
+                local->op_ret = -1;
887953
+                local->op_errno = ENOTCONN;
887953
+                gf_msg_debug(this->name, 0, "Arbiter cannot be a read subvol "
887953
+                             "for %s", local->loc.path);
887953
+                goto error;
887953
         }
887953
 
887953
         ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
887953
@@ -2389,6 +2460,11 @@ unwind:
887953
 			  local->inode, &local->replies[read_subvol].poststat,
887953
 			  local->replies[read_subvol].xdata,
887953
 			  &local->replies[par_read_subvol].postparent);
887953
+        return;
887953
+
887953
+error:
887953
+        AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, NULL,
887953
+                          NULL, NULL, NULL);
887953
 }
887953
 
887953
 /*
887953
@@ -2904,55 +2980,54 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
887953
         afr_local_t         *local = NULL;
887953
 	int                 i = -1;
887953
 	int                 op_errno = 0;
887953
-	int                 spb_choice = -1;
887953
 	int                 read_subvol = -1;
887953
+        unsigned char *data_readable = NULL;
887953
+        unsigned char *success_replies = NULL;
887953
 
887953
         priv  = this->private;
887953
         local = frame->local;
887953
-
887953
-        afr_inode_split_brain_choice_get (local->inode, this,
887953
-                                          &spb_choice);
887953
+        data_readable = alloca0 (priv->child_count);
887953
+        success_replies = alloca0 (priv->child_count);
887953
 
887953
 	for (i = 0; i < priv->child_count; i++) {
887953
 		if (!local->replies[i].valid)
887953
 			continue;
887953
-		if (local->replies[i].op_ret == 0)
887953
+		if (local->replies[i].op_ret == 0) {
887953
+                        success_replies[i] = 1;
887953
 			local->op_ret = 0;
887953
+                }
887953
 	}
887953
 
887953
 	op_errno = afr_final_errno (frame->local, this->private);
887953
 
887953
         if (local->op_ret < 0) {
887953
-		local->op_errno = op_errno;
887953
-		local->op_ret = -1;
887953
-                goto unwind;
887953
+                AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL,
887953
+                                  NULL, NULL);
887953
+                return;
887953
 	}
887953
 
887953
-	afr_replies_interpret (frame, this, local->inode, NULL);
887953
+        if (!afr_has_quorum (success_replies, this))
887953
+                goto unwind;
887953
 
887953
-	read_subvol = afr_read_subvol_decide (local->inode, this, NULL);
887953
-	if (read_subvol == -1) {
887953
-	        gf_msg (this->name, GF_LOG_WARNING, 0,
887953
-                        AFR_MSG_READ_SUBVOL_ERROR, "no read subvols for %s",
887953
-			local->loc.path);
887953
+	afr_replies_interpret (frame, this, local->inode, NULL);
887953
 
887953
-                if (spb_choice >= 0) {
887953
-                        read_subvol = spb_choice;
887953
-                } else {
887953
-                        read_subvol = afr_first_up_child (frame, this);
887953
-                }
887953
-	}
887953
+	read_subvol = afr_read_subvol_decide (local->inode, this, NULL,
887953
+                                          data_readable);
887953
 
887953
 unwind:
887953
+        afr_attempt_readsubvol_set (frame, this, success_replies, data_readable,
887953
+                                    &read_subvol);
887953
 	if (read_subvol == -1) {
887953
-                if (spb_choice >= 0)
887953
-                        read_subvol = spb_choice;
887953
-                else
887953
-                        read_subvol = afr_first_up_child (frame, this);
887953
+                AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
887953
+                                  NULL, NULL, NULL, NULL);
887953
+                return;
887953
         }
887953
+
887953
         if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
887953
-                        local->op_ret = -1;
887953
-                        local->op_errno = ENOTCONN;
887953
+                local->op_ret = -1;
887953
+                local->op_errno = ENOTCONN;
887953
+	        gf_msg_debug (this->name, 0, "Arbiter cannot be a read subvol "
887953
+                              "for %s", local->loc.path);
887953
         }
887953
 
887953
 	AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
887953
@@ -4646,7 +4721,6 @@ afr_priv_dump (xlator_t *this)
887953
         gf_proc_dump_write("read_child", "%d", priv->read_child);
887953
         gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
887953
         gf_proc_dump_write("wait_count", "%u", priv->wait_count);
887953
-        gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
887953
         gf_proc_dump_write("heal-wait-queue-length", "%d",
887953
                            priv->heal_wait_qlen);
887953
         gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
887953
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
887953
index 50e8040..f6c491b 100644
887953
--- a/xlators/cluster/afr/src/afr-read-txn.c
887953
+++ b/xlators/cluster/afr/src/afr-read-txn.c
887953
@@ -193,8 +193,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
887953
 	local->inode = inode_ref (inode);
887953
         local->is_read_txn = _gf_true;
887953
 
887953
-        if (priv->quorum_reads &&
887953
-            priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
887953
+        if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) {
887953
                 local->op_ret = -1;
887953
                 local->op_errno = ENOTCONN;
887953
                 read_subvol = -1;
887953
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
887953
index 0122b7f..1b738c0 100644
887953
--- a/xlators/cluster/afr/src/afr.c
887953
+++ b/xlators/cluster/afr/src/afr.c
887953
@@ -267,8 +267,6 @@ reconfigure (xlator_t *this, dict_t *options)
887953
         GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
887953
                           int32, out);
887953
 
887953
-        GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
887953
-                          bool, out);
887953
         GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata,
887953
                           options, bool, out);
887953
 
887953
@@ -531,7 +529,6 @@ init (xlator_t *this)
887953
 	GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
887953
         GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
887953
 
887953
-        GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
887953
         GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,
887953
                         out);
887953
         GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out);
887953
@@ -965,8 +962,8 @@ struct volume_options options[] = {
887953
         { .key = {"quorum-reads"},
887953
           .type = GF_OPTION_TYPE_BOOL,
887953
           .default_value = "no",
887953
-          .description = "If quorum-reads is \"true\" only allow reads if "
887953
-                         "quorum is met when quorum is enabled.",
887953
+          .description = "This option has been removed. Reads are not allowed "
887953
+                          "if quorum is not met.",
887953
         },
887953
         { .key  = {"node-uuid"},
887953
           .type = GF_OPTION_TYPE_STR,
887953
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
887953
index af9dbc8..7010e9b 100644
887953
--- a/xlators/cluster/afr/src/afr.h
887953
+++ b/xlators/cluster/afr/src/afr.h
887953
@@ -131,7 +131,6 @@ typedef struct _afr_private {
887953
         gf_boolean_t      pre_op_compat;      /* on/off */
887953
 	uint32_t          post_op_delay_secs;
887953
         unsigned int      quorum_count;
887953
-        gf_boolean_t      quorum_reads;
887953
 
887953
         char                   vol_uuid[UUID_SIZE + 1];
887953
         int32_t                *last_event;
887953
-- 
887953
1.8.3.1
887953