d1681e
From 3d79f49f2c7752f8f43a35563f7a1253c901db60 Mon Sep 17 00:00:00 2001
d1681e
From: Ravishankar N <ravishankar@redhat.com>
d1681e
Date: Tue, 27 Mar 2018 20:54:25 +0530
d1681e
Subject: [PATCH 227/236] afr: add quorum checks in pre-op
d1681e
d1681e
Upstream patch: https://review.gluster.org/#/c/19781/
d1681e
d1681e
Problem:
d1681e
We seem to be winding the FOP if pre-op did not succeed on quorum bricks
d1681e
and then failing the FOP with EROFS since the fop did not meet quorum.
d1681e
This essentially masks the actual error due to which pre-op failed. (See
d1681e
BZ).
d1681e
d1681e
Fix:
d1681e
Skip FOP phase if pre-op quorum is not met and go to post-op.
d1681e
d1681e
Change-Id: Ie58a41e8fa1ad79aa06093706e96db8eef61b6d9
d1681e
BUG: 1554291
d1681e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/136227
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
d1681e
---
d1681e
 xlators/cluster/afr/src/afr-transaction.c | 64 +++++++++++++++----------------
d1681e
 1 file changed, 31 insertions(+), 33 deletions(-)
d1681e
d1681e
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
d1681e
index 993029d..88dc821 100644
d1681e
--- a/xlators/cluster/afr/src/afr-transaction.c
d1681e
+++ b/xlators/cluster/afr/src/afr-transaction.c
d1681e
@@ -144,6 +144,29 @@ afr_needs_changelog_update (afr_local_t *local)
d1681e
         return _gf_false;
d1681e
 }
d1681e
 
d1681e
+gf_boolean_t
d1681e
+afr_changelog_has_quorum (afr_local_t *local, xlator_t *this)
d1681e
+{
d1681e
+        afr_private_t *priv = NULL;
d1681e
+        int i = 0;
d1681e
+        unsigned char *success_children = NULL;
d1681e
+
d1681e
+        priv = this->private;
d1681e
+        success_children = alloca0 (priv->child_count);
d1681e
+
d1681e
+        for (i = 0; i < priv->child_count; i++) {
d1681e
+                if (!local->transaction.failed_subvols[i]) {
d1681e
+                        success_children[i] = 1;
d1681e
+                }
d1681e
+        }
d1681e
+
d1681e
+        if (afr_has_quorum (success_children, this)) {
d1681e
+                return _gf_true;
d1681e
+        }
d1681e
+
d1681e
+        return _gf_false;
d1681e
+}
d1681e
+
d1681e
 int
d1681e
 afr_transaction_fop (call_frame_t *frame, xlator_t *this)
d1681e
 {
d1681e
@@ -157,17 +180,16 @@ afr_transaction_fop (call_frame_t *frame, xlator_t *this)
d1681e
         priv = this->private;
d1681e
 
d1681e
         failed_subvols = local->transaction.failed_subvols;
d1681e
-
d1681e
         call_count = priv->child_count - AFR_COUNT (failed_subvols,
d1681e
                                                     priv->child_count);
d1681e
-
d1681e
-        if (call_count == 0) {
d1681e
+        /* Fail if pre-op did not succeed on quorum no. of bricks. */
d1681e
+        if (!afr_changelog_has_quorum (local, this) || !call_count) {
d1681e
+                local->op_ret = -1;
d1681e
+                /* local->op_errno is already captured in changelog cbk. */
d1681e
                 afr_transaction_resume (frame, this);
d1681e
                 return 0;
d1681e
         }
d1681e
-
d1681e
         local->call_count = call_count;
d1681e
-
d1681e
         for (i = 0; i < priv->child_count; i++) {
d1681e
                 if (local->transaction.pre_op[i] && !failed_subvols[i]) {
d1681e
 			local->transaction.wind (frame, this, i);
d1681e
@@ -531,33 +553,6 @@ afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending)
d1681e
 
d1681e
 /* {{{ pending */
d1681e
 
d1681e
-
d1681e
-void
d1681e
-afr_handle_post_op_quorum (afr_local_t *local, xlator_t *this)
d1681e
-{
d1681e
-        afr_private_t *priv = NULL;
d1681e
-        int i = 0;
d1681e
-        unsigned char *post_op_children = NULL;
d1681e
-
d1681e
-        priv = this->private;
d1681e
-        post_op_children = alloca0 (priv->child_count);
d1681e
-
d1681e
-        for (i = 0; i < priv->child_count; i++) {
d1681e
-                if (!local->transaction.failed_subvols[i]) {
d1681e
-                        post_op_children[i] = 1;
d1681e
-                }
d1681e
-        }
d1681e
-
d1681e
-        if (afr_has_quorum (post_op_children, this)) {
d1681e
-                return;
d1681e
-        }
d1681e
-
d1681e
-        local->op_ret = -1;
d1681e
-        /*local->op_errno is already captured in post-op callback.*/
d1681e
-
d1681e
-        return;
d1681e
-}
d1681e
-
d1681e
 int
d1681e
 afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
d1681e
 {
d1681e
@@ -568,7 +563,10 @@ afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
d1681e
         int_lock = &local->internal_lock;
d1681e
 
d1681e
         /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
d1681e
-        afr_handle_post_op_quorum (local, this);
d1681e
+        if (!afr_changelog_has_quorum (local, this)) {
d1681e
+                local->op_ret = -1;
d1681e
+                /*local->op_errno is already captured in changelog cbk*/
d1681e
+        }
d1681e
 
d1681e
 	if (local->transaction.resume_stub) {
d1681e
 		call_resume (local->transaction.resume_stub);
d1681e
-- 
d1681e
1.8.3.1
d1681e