From 3d79f49f2c7752f8f43a35563f7a1253c901db60 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Tue, 27 Mar 2018 20:54:25 +0530 Subject: [PATCH 227/236] afr: add quorum checks in pre-op Upstream patch: https://review.gluster.org/#/c/19781/ Problem: We seem to be winding the FOP if pre-op did not succeed on quorum bricks and then failing the FOP with EROFS since the fop did not meet quorum. This essentially masks the actual error due to which pre-op failed. (See BZ). Fix: Skip FOP phase if pre-op quorum is not met and go to post-op. Change-Id: Ie58a41e8fa1ad79aa06093706e96db8eef61b6d9 BUG: 1554291 Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/136227 Tested-by: RHGS Build Bot Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-transaction.c | 64 +++++++++++++++---------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 993029d..88dc821 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -144,6 +144,29 @@ afr_needs_changelog_update (afr_local_t *local) return _gf_false; } +gf_boolean_t +afr_changelog_has_quorum (afr_local_t *local, xlator_t *this) +{ + afr_private_t *priv = NULL; + int i = 0; + unsigned char *success_children = NULL; + + priv = this->private; + success_children = alloca0 (priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.failed_subvols[i]) { + success_children[i] = 1; + } + } + + if (afr_has_quorum (success_children, this)) { + return _gf_true; + } + + return _gf_false; +} + int afr_transaction_fop (call_frame_t *frame, xlator_t *this) { @@ -157,17 +180,16 @@ afr_transaction_fop (call_frame_t *frame, xlator_t *this) priv = this->private; failed_subvols = local->transaction.failed_subvols; - call_count = priv->child_count - AFR_COUNT (failed_subvols, priv->child_count); - - if (call_count == 0) { + /* Fail if pre-op did not succeed on quorum no. of bricks. */ + if (!afr_changelog_has_quorum (local, this) || !call_count) { + local->op_ret = -1; + /* local->op_errno is already captured in changelog cbk. */ afr_transaction_resume (frame, this); return 0; } - local->call_count = call_count; - for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i] && !failed_subvols[i]) { local->transaction.wind (frame, this, i); @@ -531,33 +553,6 @@ afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending) /* {{{ pending */ - -void -afr_handle_post_op_quorum (afr_local_t *local, xlator_t *this) -{ - afr_private_t *priv = NULL; - int i = 0; - unsigned char *post_op_children = NULL; - - priv = this->private; - post_op_children = alloca0 (priv->child_count); - - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.failed_subvols[i]) { - post_op_children[i] = 1; - } - } - - if (afr_has_quorum (post_op_children, this)) { - return; - } - - local->op_ret = -1; - /*local->op_errno is already captured in post-op callback.*/ - - return; -} - int afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this) { @@ -568,7 +563,10 @@ afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this) int_lock = &local->internal_lock; /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */ - afr_handle_post_op_quorum (local, this); + if (!afr_changelog_has_quorum (local, this)) { + local->op_ret = -1; + /*local->op_errno is already captured in changelog cbk*/ + } if (local->transaction.resume_stub) { call_resume (local->transaction.resume_stub); -- 1.8.3.1