|
|
887953 |
From e3f6fc1ccff95145b94aea9405cd136ada9000bc Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
887953 |
Date: Wed, 16 Aug 2017 18:01:17 +0530
|
|
|
887953 |
Subject: [PATCH 476/493] afr: add checks for allowing lookups
|
|
|
887953 |
|
|
|
887953 |
Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/17673/
|
|
|
887953 |
|
|
|
887953 |
Problem:
|
|
|
887953 |
In an arbiter volume, lookup was being served from one of the sink
|
|
|
887953 |
bricks (source brick was down). shard uses the iatt values from lookup cbk
|
|
|
887953 |
to calculate the size and block count, which in this case were incorrect
|
|
|
887953 |
values. shard_local_t->last_block was thus initialised to -1, resulting
|
|
|
887953 |
in an infinite while loop in shard_common_resolve_shards().
|
|
|
887953 |
|
|
|
887953 |
Fix:
|
|
|
887953 |
Use client quorum logic to allow or fail the lookups from afr if there
|
|
|
887953 |
are no readable subvolumes. So in replica-3 or arbiter vols, if there is
|
|
|
887953 |
no good copy or if quorum is not met, fail lookup with ENOTCONN.
|
|
|
887953 |
|
|
|
887953 |
With this fix, we are also removing support for quorum-reads xlator
|
|
|
887953 |
option. So if quorum is not met, neither read nor write txns are allowed
|
|
|
887953 |
and we fail the fop with ENOTCONN.
|
|
|
887953 |
|
|
|
887953 |
Change-Id: Ic65c00c24f77ece007328b421494eee62a505fa0
|
|
|
887953 |
BUG: 1362129
|
|
|
887953 |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/158650
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
|
|
|
887953 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
tests/basic/afr/quorum.t | 23 ----
|
|
|
887953 |
tests/bugs/replicate/bug-977797.t | 2 +
|
|
|
887953 |
xlators/cluster/afr/src/afr-common.c | 244 +++++++++++++++++++++------------
|
|
|
887953 |
xlators/cluster/afr/src/afr-read-txn.c | 3 +-
|
|
|
887953 |
xlators/cluster/afr/src/afr.c | 7 +-
|
|
|
887953 |
xlators/cluster/afr/src/afr.h | 1 -
|
|
|
887953 |
6 files changed, 164 insertions(+), 116 deletions(-)
|
|
|
887953 |
|
|
|
887953 |
diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t
|
|
|
887953 |
index 252e254..58116ba 100644
|
|
|
887953 |
--- a/tests/basic/afr/quorum.t
|
|
|
887953 |
+++ b/tests/basic/afr/quorum.t
|
|
|
887953 |
@@ -31,11 +31,7 @@ TEST $CLI volume set $V0 cluster.quorum-count 2
|
|
|
887953 |
TEST test_write
|
|
|
887953 |
TEST kill_brick $V0 $H0 $B0/${V0}1
|
|
|
887953 |
TEST ! test_write
|
|
|
887953 |
-EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.quorum-reads on
|
|
|
887953 |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
|
|
887953 |
TEST ! cat $M0/b
|
|
|
887953 |
-TEST $CLI volume reset $V0 cluster.quorum-reads
|
|
|
887953 |
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.quorum-type auto
|
|
|
887953 |
EXPECT auto volume_option $V0 cluster.quorum-type
|
|
|
887953 |
@@ -44,11 +40,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
887953 |
TEST test_write
|
|
|
887953 |
TEST kill_brick $V0 $H0 $B0/${V0}1
|
|
|
887953 |
TEST ! test_write
|
|
|
887953 |
-EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.quorum-reads on
|
|
|
887953 |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
|
|
887953 |
TEST ! cat $M0/b
|
|
|
887953 |
-TEST $CLI volume reset $V0 cluster.quorum-reads
|
|
|
887953 |
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.quorum-type none
|
|
|
887953 |
EXPECT none volume_option $V0 cluster.quorum-type
|
|
|
887953 |
@@ -57,11 +49,6 @@ TEST test_write
|
|
|
887953 |
TEST $CLI volume reset $V0 cluster.quorum-type
|
|
|
887953 |
TEST test_write
|
|
|
887953 |
EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.quorum-reads on
|
|
|
887953 |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
|
|
887953 |
-EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume reset $V0 cluster.quorum-reads
|
|
|
887953 |
-
|
|
|
887953 |
|
|
|
887953 |
cleanup;
|
|
|
887953 |
TEST glusterd;
|
|
|
887953 |
@@ -86,24 +73,14 @@ TEST $CLI volume set $V0 cluster.quorum-count 3
|
|
|
887953 |
TEST test_write
|
|
|
887953 |
TEST kill_brick $V0 $H0 $B0/${V0}1
|
|
|
887953 |
TEST ! test_write
|
|
|
887953 |
-EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.quorum-reads on
|
|
|
887953 |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
|
|
887953 |
TEST ! cat $M0/b
|
|
|
887953 |
-TEST $CLI volume reset $V0 cluster.quorum-reads
|
|
|
887953 |
-
|
|
|
887953 |
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.quorum-type auto
|
|
|
887953 |
EXPECT auto volume_option $V0 cluster.quorum-type
|
|
|
887953 |
TEST test_write
|
|
|
887953 |
TEST kill_brick $V0 $H0 $B0/${V0}3
|
|
|
887953 |
TEST ! test_write
|
|
|
887953 |
-EXPECT "abc" cat $M0/b
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.quorum-reads on
|
|
|
887953 |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads
|
|
|
887953 |
TEST ! cat $M0/b
|
|
|
887953 |
-TEST $CLI volume reset $V0 cluster.quorum-reads
|
|
|
887953 |
-
|
|
|
887953 |
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.quorum-type none
|
|
|
887953 |
EXPECT none volume_option $V0 cluster.quorum-type
|
|
|
887953 |
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
|
|
|
887953 |
index ea9a98a..fee8205 100755
|
|
|
887953 |
--- a/tests/bugs/replicate/bug-977797.t
|
|
|
887953 |
+++ b/tests/bugs/replicate/bug-977797.t
|
|
|
887953 |
@@ -53,6 +53,8 @@ TEST chmod 757 $M0/a/file
|
|
|
887953 |
TEST $CLI volume start $V0 force
|
|
|
887953 |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
|
|
|
887953 |
|
|
|
887953 |
+#Trigger entry heal of $M0/a
|
|
|
887953 |
+getfattr -n user.nosuchattr $M0/a
|
|
|
887953 |
dd if=$M0/a/file of=/dev/null bs=1024k
|
|
|
887953 |
#read fails, but heal is triggered.
|
|
|
887953 |
TEST [ $? -ne 0 ]
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
index 10d9620..231de9d 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
@@ -47,9 +47,7 @@
|
|
|
887953 |
int32_t
|
|
|
887953 |
afr_quorum_errno (afr_private_t *priv)
|
|
|
887953 |
{
|
|
|
887953 |
- if (priv->quorum_reads)
|
|
|
887953 |
- return ENOTCONN;
|
|
|
887953 |
- return EROFS;
|
|
|
887953 |
+ return ENOTCONN;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
int
|
|
|
887953 |
@@ -1154,8 +1152,6 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
887953 |
return ret;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
-
|
|
|
887953 |
-
|
|
|
887953 |
int
|
|
|
887953 |
afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
|
|
|
887953 |
{
|
|
|
887953 |
@@ -1726,6 +1722,29 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
|
|
|
887953 |
return ret;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+void
|
|
|
887953 |
+afr_readables_intersect_get (inode_t *inode, xlator_t *this, int *event,
|
|
|
887953 |
+ unsigned char *intersection)
|
|
|
887953 |
+{
|
|
|
887953 |
+ afr_private_t *priv = NULL;
|
|
|
887953 |
+ unsigned char *data_readable = NULL;
|
|
|
887953 |
+ unsigned char *metadata_readable = NULL;
|
|
|
887953 |
+ unsigned char *intersect = NULL;
|
|
|
887953 |
+
|
|
|
887953 |
+ priv = this->private;
|
|
|
887953 |
+ data_readable = alloca0 (priv->child_count);
|
|
|
887953 |
+ metadata_readable = alloca0 (priv->child_count);
|
|
|
887953 |
+ intersect = alloca0 (priv->child_count);
|
|
|
887953 |
+
|
|
|
887953 |
+ afr_inode_read_subvol_get (inode, this, data_readable,
|
|
|
887953 |
+ metadata_readable, event);
|
|
|
887953 |
+
|
|
|
887953 |
+ AFR_INTERSECT (intersect, data_readable, metadata_readable,
|
|
|
887953 |
+ priv->child_count);
|
|
|
887953 |
+ if (intersection)
|
|
|
887953 |
+ memcpy (intersection, intersect,
|
|
|
887953 |
+ sizeof (*intersection) * priv->child_count);
|
|
|
887953 |
+}
|
|
|
887953 |
|
|
|
887953 |
int
|
|
|
887953 |
afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
|
|
|
887953 |
@@ -1734,8 +1753,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
|
|
|
887953 |
afr_read_subvol_args_t *args)
|
|
|
887953 |
{
|
|
|
887953 |
afr_private_t *priv = NULL;
|
|
|
887953 |
- unsigned char *data_readable = NULL;
|
|
|
887953 |
- unsigned char *metadata_readable = NULL;
|
|
|
887953 |
unsigned char *readable = NULL;
|
|
|
887953 |
unsigned char *intersection = NULL;
|
|
|
887953 |
int subvol = -1;
|
|
|
887953 |
@@ -1744,17 +1761,11 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
|
|
|
887953 |
priv = this->private;
|
|
|
887953 |
|
|
|
887953 |
readable = alloca0 (priv->child_count);
|
|
|
887953 |
- data_readable = alloca0 (priv->child_count);
|
|
|
887953 |
- metadata_readable = alloca0 (priv->child_count);
|
|
|
887953 |
intersection = alloca0 (priv->child_count);
|
|
|
887953 |
|
|
|
887953 |
afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
|
|
|
887953 |
|
|
|
887953 |
- afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
|
|
|
887953 |
- &event);
|
|
|
887953 |
-
|
|
|
887953 |
- AFR_INTERSECT (intersection, data_readable, metadata_readable,
|
|
|
887953 |
- priv->child_count);
|
|
|
887953 |
+ afr_readables_intersect_get (inode, this, &event, intersection);
|
|
|
887953 |
|
|
|
887953 |
if (AFR_COUNT (intersection, priv->child_count) > 0)
|
|
|
887953 |
subvol = afr_read_subvol_select_by_policy (inode, this,
|
|
|
887953 |
@@ -2188,18 +2199,28 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
|
|
|
887953 |
|
|
|
887953 |
int
|
|
|
887953 |
afr_read_subvol_decide (inode_t *inode, xlator_t *this,
|
|
|
887953 |
- afr_read_subvol_args_t *args)
|
|
|
887953 |
+ afr_read_subvol_args_t *args, unsigned char *readable)
|
|
|
887953 |
{
|
|
|
887953 |
- int data_subvol = -1;
|
|
|
887953 |
- int mdata_subvol = -1;
|
|
|
887953 |
+ int event = 0;
|
|
|
887953 |
+ afr_private_t *priv = NULL;
|
|
|
887953 |
+ unsigned char *intersection = NULL;
|
|
|
887953 |
+
|
|
|
887953 |
+ priv = this->private;
|
|
|
887953 |
+ intersection = alloca0 (priv->child_count);
|
|
|
887953 |
+
|
|
|
887953 |
+ afr_readables_intersect_get (inode, this, &event, intersection);
|
|
|
887953 |
|
|
|
887953 |
- data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args);
|
|
|
887953 |
- mdata_subvol = afr_metadata_subvol_get (inode, this,
|
|
|
887953 |
- NULL, NULL, NULL, args);
|
|
|
887953 |
- if (data_subvol == -1 || mdata_subvol == -1)
|
|
|
887953 |
+ if (AFR_COUNT (intersection, priv->child_count) <= 0) {
|
|
|
887953 |
+ /* TODO: If we have one brick with valid data_readable and
|
|
|
887953 |
+ * another with metadata_readable, try to send an iatt with
|
|
|
887953 |
+ * valid bits from both.*/
|
|
|
887953 |
return -1;
|
|
|
887953 |
+ }
|
|
|
887953 |
|
|
|
887953 |
- return data_subvol;
|
|
|
887953 |
+ memcpy (readable, intersection, sizeof (*readable) * priv->child_count);
|
|
|
887953 |
+
|
|
|
887953 |
+ return afr_read_subvol_select_by_policy (inode, this, intersection,
|
|
|
887953 |
+ args);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
static inline int
|
|
|
887953 |
@@ -2216,7 +2237,49 @@ afr_first_up_child (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
if (local->replies[i].valid &&
|
|
|
887953 |
local->replies[i].op_ret == 0)
|
|
|
887953 |
return i;
|
|
|
887953 |
- return 0;
|
|
|
887953 |
+ return -1;
|
|
|
887953 |
+}
|
|
|
887953 |
+
|
|
|
887953 |
+static void
|
|
|
887953 |
+afr_attempt_readsubvol_set (call_frame_t *frame, xlator_t *this,
|
|
|
887953 |
+ unsigned char *success_replies,
|
|
|
887953 |
+ unsigned char *data_readable, int *read_subvol)
|
|
|
887953 |
+{
|
|
|
887953 |
+ afr_private_t *priv = NULL;
|
|
|
887953 |
+ afr_local_t *local = NULL;
|
|
|
887953 |
+ int spb_choice = -1;
|
|
|
887953 |
+ int child_count = -1;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (*read_subvol != -1)
|
|
|
887953 |
+ return;
|
|
|
887953 |
+
|
|
|
887953 |
+ priv = this->private;
|
|
|
887953 |
+ local = frame->local;
|
|
|
887953 |
+ child_count = priv->child_count;
|
|
|
887953 |
+
|
|
|
887953 |
+ afr_inode_split_brain_choice_get (local->inode, this,
|
|
|
887953 |
+ &spb_choice);
|
|
|
887953 |
+ if ((spb_choice >= 0) &&
|
|
|
887953 |
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
|
|
|
887953 |
+ *read_subvol = spb_choice;
|
|
|
887953 |
+ } else if (!priv->quorum_count) {
|
|
|
887953 |
+ *read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
+ } else if (priv->quorum_count &&
|
|
|
887953 |
+ afr_has_quorum (data_readable, this)) {
|
|
|
887953 |
+ /* read_subvol is guaranteed to be valid if we hit this path. */
|
|
|
887953 |
+ *read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ /* If quorum is enabled and we do not have a
|
|
|
887953 |
+ readable yet, it means all good copies are down.
|
|
|
887953 |
+ */
|
|
|
887953 |
+ local->op_ret = -1;
|
|
|
887953 |
+ local->op_errno = ENOTCONN;
|
|
|
887953 |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
887953 |
+ AFR_MSG_READ_SUBVOL_ERROR, "no read "
|
|
|
887953 |
+ "subvols for %s", local->loc.path);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ if (*read_subvol >= 0)
|
|
|
887953 |
+ dict_del (local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
static void
|
|
|
887953 |
@@ -2230,13 +2293,13 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
int par_read_subvol = 0;
|
|
|
887953 |
int ret = -1;
|
|
|
887953 |
unsigned char *readable = NULL;
|
|
|
887953 |
+ unsigned char *success_replies = NULL;
|
|
|
887953 |
int event = 0;
|
|
|
887953 |
struct afr_reply *replies = NULL;
|
|
|
887953 |
uuid_t read_gfid = {0, };
|
|
|
887953 |
gf_boolean_t locked_entry = _gf_false;
|
|
|
887953 |
gf_boolean_t can_interpret = _gf_true;
|
|
|
887953 |
inode_t *parent = NULL;
|
|
|
887953 |
- int spb_choice = -1;
|
|
|
887953 |
ia_type_t ia_type = IA_INVAL;
|
|
|
887953 |
afr_read_subvol_args_t args = {0,};
|
|
|
887953 |
char *gfid_heal_msg = NULL;
|
|
|
887953 |
@@ -2250,11 +2313,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
this);
|
|
|
887953 |
|
|
|
887953 |
readable = alloca0 (priv->child_count);
|
|
|
887953 |
+ success_replies = alloca0 (priv->child_count);
|
|
|
887953 |
|
|
|
887953 |
afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
|
|
|
887953 |
+ par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
|
|
|
887953 |
+ readable);
|
|
|
887953 |
|
|
|
887953 |
- afr_inode_split_brain_choice_get (local->inode, this,
|
|
|
887953 |
- &spb_choice);
|
|
|
887953 |
/* First, check if we have a gfid-change from somewhere,
|
|
|
887953 |
If so, propagate that so that a fresh lookup can be
|
|
|
887953 |
issued
|
|
|
887953 |
@@ -2262,13 +2326,17 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
if (local->cont.lookup.needs_fresh_lookup) {
|
|
|
887953 |
local->op_ret = -1;
|
|
|
887953 |
local->op_errno = ESTALE;
|
|
|
887953 |
- goto unwind;
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
op_errno = afr_final_errno (frame->local, this->private);
|
|
|
887953 |
local->op_errno = op_errno;
|
|
|
887953 |
|
|
|
887953 |
read_subvol = -1;
|
|
|
887953 |
+ for (i = 0; i < priv->child_count; i++)
|
|
|
887953 |
+ if (replies[i].valid && replies[i].op_ret == 0)
|
|
|
887953 |
+ success_replies[i] = 1;
|
|
|
887953 |
+
|
|
|
887953 |
for (i = 0; i < priv->child_count; i++) {
|
|
|
887953 |
if (!replies[i].valid)
|
|
|
887953 |
continue;
|
|
|
887953 |
@@ -2277,9 +2345,9 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
replies[i].op_errno == ENOENT) {
|
|
|
887953 |
/* Second, check entry is still
|
|
|
887953 |
"underway" in creation */
|
|
|
887953 |
- local->op_ret = -1;
|
|
|
887953 |
- local->op_errno = ENOENT;
|
|
|
887953 |
- goto unwind;
|
|
|
887953 |
+ local->op_ret = -1;
|
|
|
887953 |
+ local->op_errno = ENOENT;
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
if (replies[i].op_ret == -1)
|
|
|
887953 |
@@ -2293,8 +2361,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- if (read_subvol == -1)
|
|
|
887953 |
- goto unwind;
|
|
|
887953 |
+ if (read_subvol == -1)
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
/* We now have a read_subvol, which is readable[] (if there
|
|
|
887953 |
were any). Next we look for GFID mismatches. We don't
|
|
|
887953 |
consider a GFID mismatch as an error if read_subvol is
|
|
|
887953 |
@@ -2318,58 +2386,61 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
if (readable[read_subvol] && !readable[i])
|
|
|
887953 |
continue;
|
|
|
887953 |
|
|
|
887953 |
+ /* If we were called from glfsheal and there is still a gfid
|
|
|
887953 |
+ * mismatch, succeed the lookup and let glfsheal print the
|
|
|
887953 |
+ * response via gfid-heal-msg.*/
|
|
|
887953 |
+ if (!dict_get_str (local->xattr_req, "gfid-heal-msg",
|
|
|
887953 |
+ &gfid_heal_msg))
|
|
|
887953 |
+ goto cant_interpret;
|
|
|
887953 |
+
|
|
|
887953 |
/* LOG ERROR */
|
|
|
887953 |
local->op_ret = -1;
|
|
|
887953 |
local->op_errno = EIO;
|
|
|
887953 |
- goto unwind;
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
/* Forth, for the finalized GFID, pick the best subvolume
|
|
|
887953 |
to return stats from.
|
|
|
887953 |
*/
|
|
|
887953 |
+ read_subvol = -1;
|
|
|
887953 |
+ memset (readable, 0, sizeof (*readable) * priv->child_count);
|
|
|
887953 |
if (can_interpret) {
|
|
|
887953 |
+ if (!afr_has_quorum (success_replies, this))
|
|
|
887953 |
+ goto cant_interpret;
|
|
|
887953 |
/* It is safe to call afr_replies_interpret() because we have
|
|
|
887953 |
a response from all the UP subvolumes and all of them resolved
|
|
|
887953 |
to the same GFID
|
|
|
887953 |
*/
|
|
|
887953 |
gf_uuid_copy (args.gfid, read_gfid);
|
|
|
887953 |
args.ia_type = ia_type;
|
|
|
887953 |
- if (afr_replies_interpret (frame, this, local->inode, NULL)) {
|
|
|
887953 |
- read_subvol = afr_read_subvol_decide (local->inode,
|
|
|
887953 |
- this, &args);
|
|
|
887953 |
+ ret = afr_replies_interpret (frame, this, local->inode, NULL);
|
|
|
887953 |
+ read_subvol = afr_read_subvol_decide (local->inode, this, &args,
|
|
|
887953 |
+ readable);
|
|
|
887953 |
+ if (read_subvol == -1)
|
|
|
887953 |
+ goto cant_interpret;
|
|
|
887953 |
+ if (ret) {
|
|
|
887953 |
afr_inode_event_gen_reset (local->inode, this);
|
|
|
887953 |
- goto cant_interpret;
|
|
|
887953 |
- } else {
|
|
|
887953 |
- read_subvol = afr_data_subvol_get (local->inode, this,
|
|
|
887953 |
- NULL, NULL, NULL, &args);
|
|
|
887953 |
- }
|
|
|
887953 |
+ dict_del (local->replies[read_subvol].xdata,
|
|
|
887953 |
+ GF_CONTENT_KEY);
|
|
|
887953 |
+ }
|
|
|
887953 |
} else {
|
|
|
887953 |
cant_interpret:
|
|
|
887953 |
+ afr_attempt_readsubvol_set (frame, this, success_replies,
|
|
|
887953 |
+ readable, &read_subvol);
|
|
|
887953 |
if (read_subvol == -1) {
|
|
|
887953 |
- if (spb_choice >= 0)
|
|
|
887953 |
- read_subvol = spb_choice;
|
|
|
887953 |
- else
|
|
|
887953 |
- read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
}
|
|
|
887953 |
- dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
afr_handle_quota_size (frame, this);
|
|
|
887953 |
|
|
|
887953 |
-unwind:
|
|
|
887953 |
afr_set_need_heal (this, local);
|
|
|
887953 |
- if (read_subvol == -1) {
|
|
|
887953 |
- if (spb_choice >= 0)
|
|
|
887953 |
- read_subvol = spb_choice;
|
|
|
887953 |
- else
|
|
|
887953 |
- read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
-
|
|
|
887953 |
- }
|
|
|
887953 |
- par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
|
|
|
887953 |
- readable);
|
|
|
887953 |
if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
|
|
|
887953 |
- local->op_ret = -1;
|
|
|
887953 |
- local->op_errno = ENOTCONN;
|
|
|
887953 |
+ local->op_ret = -1;
|
|
|
887953 |
+ local->op_errno = ENOTCONN;
|
|
|
887953 |
+ gf_msg_debug(this->name, 0, "Arbiter cannot be a read subvol "
|
|
|
887953 |
+ "for %s", local->loc.path);
|
|
|
887953 |
+ goto error;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
|
|
|
887953 |
@@ -2389,6 +2460,11 @@ unwind:
|
|
|
887953 |
local->inode, &local->replies[read_subvol].poststat,
|
|
|
887953 |
local->replies[read_subvol].xdata,
|
|
|
887953 |
&local->replies[par_read_subvol].postparent);
|
|
|
887953 |
+ return;
|
|
|
887953 |
+
|
|
|
887953 |
+error:
|
|
|
887953 |
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, NULL,
|
|
|
887953 |
+ NULL, NULL, NULL);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
/*
|
|
|
887953 |
@@ -2904,55 +2980,54 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
afr_local_t *local = NULL;
|
|
|
887953 |
int i = -1;
|
|
|
887953 |
int op_errno = 0;
|
|
|
887953 |
- int spb_choice = -1;
|
|
|
887953 |
int read_subvol = -1;
|
|
|
887953 |
+ unsigned char *data_readable = NULL;
|
|
|
887953 |
+ unsigned char *success_replies = NULL;
|
|
|
887953 |
|
|
|
887953 |
priv = this->private;
|
|
|
887953 |
local = frame->local;
|
|
|
887953 |
-
|
|
|
887953 |
- afr_inode_split_brain_choice_get (local->inode, this,
|
|
|
887953 |
- &spb_choice);
|
|
|
887953 |
+ data_readable = alloca0 (priv->child_count);
|
|
|
887953 |
+ success_replies = alloca0 (priv->child_count);
|
|
|
887953 |
|
|
|
887953 |
for (i = 0; i < priv->child_count; i++) {
|
|
|
887953 |
if (!local->replies[i].valid)
|
|
|
887953 |
continue;
|
|
|
887953 |
- if (local->replies[i].op_ret == 0)
|
|
|
887953 |
+ if (local->replies[i].op_ret == 0) {
|
|
|
887953 |
+ success_replies[i] = 1;
|
|
|
887953 |
local->op_ret = 0;
|
|
|
887953 |
+ }
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
op_errno = afr_final_errno (frame->local, this->private);
|
|
|
887953 |
|
|
|
887953 |
if (local->op_ret < 0) {
|
|
|
887953 |
- local->op_errno = op_errno;
|
|
|
887953 |
- local->op_ret = -1;
|
|
|
887953 |
- goto unwind;
|
|
|
887953 |
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL,
|
|
|
887953 |
+ NULL, NULL);
|
|
|
887953 |
+ return;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- afr_replies_interpret (frame, this, local->inode, NULL);
|
|
|
887953 |
+ if (!afr_has_quorum (success_replies, this))
|
|
|
887953 |
+ goto unwind;
|
|
|
887953 |
|
|
|
887953 |
- read_subvol = afr_read_subvol_decide (local->inode, this, NULL);
|
|
|
887953 |
- if (read_subvol == -1) {
|
|
|
887953 |
- gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
887953 |
- AFR_MSG_READ_SUBVOL_ERROR, "no read subvols for %s",
|
|
|
887953 |
- local->loc.path);
|
|
|
887953 |
+ afr_replies_interpret (frame, this, local->inode, NULL);
|
|
|
887953 |
|
|
|
887953 |
- if (spb_choice >= 0) {
|
|
|
887953 |
- read_subvol = spb_choice;
|
|
|
887953 |
- } else {
|
|
|
887953 |
- read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
- }
|
|
|
887953 |
- }
|
|
|
887953 |
+ read_subvol = afr_read_subvol_decide (local->inode, this, NULL,
|
|
|
887953 |
+ data_readable);
|
|
|
887953 |
|
|
|
887953 |
unwind:
|
|
|
887953 |
+ afr_attempt_readsubvol_set (frame, this, success_replies, data_readable,
|
|
|
887953 |
+ &read_subvol);
|
|
|
887953 |
if (read_subvol == -1) {
|
|
|
887953 |
- if (spb_choice >= 0)
|
|
|
887953 |
- read_subvol = spb_choice;
|
|
|
887953 |
- else
|
|
|
887953 |
- read_subvol = afr_first_up_child (frame, this);
|
|
|
887953 |
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
|
|
|
887953 |
+ NULL, NULL, NULL, NULL);
|
|
|
887953 |
+ return;
|
|
|
887953 |
}
|
|
|
887953 |
+
|
|
|
887953 |
if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
|
|
|
887953 |
- local->op_ret = -1;
|
|
|
887953 |
- local->op_errno = ENOTCONN;
|
|
|
887953 |
+ local->op_ret = -1;
|
|
|
887953 |
+ local->op_errno = ENOTCONN;
|
|
|
887953 |
+ gf_msg_debug (this->name, 0, "Arbiter cannot be a read subvol "
|
|
|
887953 |
+ "for %s", local->loc.path);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
|
|
|
887953 |
@@ -4646,7 +4721,6 @@ afr_priv_dump (xlator_t *this)
|
|
|
887953 |
gf_proc_dump_write("read_child", "%d", priv->read_child);
|
|
|
887953 |
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
|
|
|
887953 |
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
|
|
|
887953 |
- gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
|
|
|
887953 |
gf_proc_dump_write("heal-wait-queue-length", "%d",
|
|
|
887953 |
priv->heal_wait_qlen);
|
|
|
887953 |
gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
|
|
|
887953 |
index 50e8040..f6c491b 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-read-txn.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-read-txn.c
|
|
|
887953 |
@@ -193,8 +193,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
887953 |
local->inode = inode_ref (inode);
|
|
|
887953 |
local->is_read_txn = _gf_true;
|
|
|
887953 |
|
|
|
887953 |
- if (priv->quorum_reads &&
|
|
|
887953 |
- priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
|
|
|
887953 |
+ if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) {
|
|
|
887953 |
local->op_ret = -1;
|
|
|
887953 |
local->op_errno = ENOTCONN;
|
|
|
887953 |
read_subvol = -1;
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
|
|
|
887953 |
index 0122b7f..1b738c0 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr.c
|
|
|
887953 |
@@ -267,8 +267,6 @@ reconfigure (xlator_t *this, dict_t *options)
|
|
|
887953 |
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
|
|
|
887953 |
int32, out);
|
|
|
887953 |
|
|
|
887953 |
- GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
|
|
|
887953 |
- bool, out);
|
|
|
887953 |
GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata,
|
|
|
887953 |
options, bool, out);
|
|
|
887953 |
|
|
|
887953 |
@@ -531,7 +529,6 @@ init (xlator_t *this)
|
|
|
887953 |
GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
|
|
|
887953 |
GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
|
|
|
887953 |
|
|
|
887953 |
- GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
|
|
|
887953 |
GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,
|
|
|
887953 |
out);
|
|
|
887953 |
GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out);
|
|
|
887953 |
@@ -965,8 +962,8 @@ struct volume_options options[] = {
|
|
|
887953 |
{ .key = {"quorum-reads"},
|
|
|
887953 |
.type = GF_OPTION_TYPE_BOOL,
|
|
|
887953 |
.default_value = "no",
|
|
|
887953 |
- .description = "If quorum-reads is \"true\" only allow reads if "
|
|
|
887953 |
- "quorum is met when quorum is enabled.",
|
|
|
887953 |
+ .description = "This option has been removed. Reads are not allowed "
|
|
|
887953 |
+ "if quorum is not met.",
|
|
|
887953 |
},
|
|
|
887953 |
{ .key = {"node-uuid"},
|
|
|
887953 |
.type = GF_OPTION_TYPE_STR,
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
index af9dbc8..7010e9b 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
@@ -131,7 +131,6 @@ typedef struct _afr_private {
|
|
|
887953 |
gf_boolean_t pre_op_compat; /* on/off */
|
|
|
887953 |
uint32_t post_op_delay_secs;
|
|
|
887953 |
unsigned int quorum_count;
|
|
|
887953 |
- gf_boolean_t quorum_reads;
|
|
|
887953 |
|
|
|
887953 |
char vol_uuid[UUID_SIZE + 1];
|
|
|
887953 |
int32_t *last_event;
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|