21ab4e
From a1b0b250e4080bcb1c5dc7b263c1eca26e772152 Mon Sep 17 00:00:00 2001
21ab4e
From: Ravishankar N <ravishankar@redhat.com>
21ab4e
Date: Sun, 2 Apr 2017 18:08:04 +0530
21ab4e
Subject: [PATCH 388/393] afr: don't do a post-op on a brick if op failed
21ab4e
21ab4e
Problem:
21ab4e
In afr-v2, self-blaming xattrs are not there by design. But if the FOP
21ab4e
failed on a brick due to an error other than ENOTCONN (or even due to
21ab4e
ENOTCONN, but we regained connection before postop was wound), we wind
21ab4e
the post-op also on the failed brick, leading to setting self-blaming
21ab4e
xattrs on that brick. This can lead to undesired results like healing of
21ab4e
files in split-brain etc.
21ab4e
21ab4e
Fix:
21ab4e
If a fop failed on a brick on which pre-op was successful, do not
21ab4e
perform post-op on it. This also produces the desired effect of not
21ab4e
resetting the dirty xattr on the brick, which is how it should be
21ab4e
because if the fop failed on a brick, there is no reason to clear the
21ab4e
dirty bit which actually serves as an indication of the failure.
21ab4e
21ab4e
> Reviewed-on: https://review.gluster.org/16976
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
21ab4e
21ab4e
Change-Id: Ia27a97646d11cd73592031027e6b0b6d412927c5
21ab4e
BUG: 1394118
21ab4e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/103750
21ab4e
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
21ab4e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
21ab4e
---
21ab4e
 .../bug-1438255-do-not-mark-self-accusing-xattrs.t | 46 ++++++++++++++++++++++
21ab4e
 xlators/cluster/afr/src/afr-transaction.c          | 21 +++++++---
21ab4e
 2 files changed, 61 insertions(+), 6 deletions(-)
21ab4e
 create mode 100644 tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
21ab4e
21ab4e
diff --git a/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
21ab4e
new file mode 100644
21ab4e
index 0000000..edfd0d7
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
21ab4e
@@ -0,0 +1,46 @@
21ab4e
+#!/bin/bash
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+cleanup;
21ab4e
+
21ab4e
+NEW_USER=bug1438255
21ab4e
+NEW_UID=1438255
21ab4e
+NEW_GID=1438255
21ab4e
+
21ab4e
+TEST groupadd -o -g ${NEW_GID} ${NEW_USER}-${NEW_GID}
21ab4e
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -K MAIL_DIR=/dev/null ${NEW_USER}
21ab4e
+
21ab4e
+TEST glusterd
21ab4e
+TEST pidof glusterd
21ab4e
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
21ab4e
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
21ab4e
+TEST $CLI volume set $V0 cluster.data-self-heal off
21ab4e
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
21ab4e
+TEST $CLI volume set $V0 cluster.entry-self-heal off
21ab4e
+
21ab4e
+TEST $CLI volume start $V0
21ab4e
+EXPECT 'Started' volinfo_field $V0 'Status'
21ab4e
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
21ab4e
+
21ab4e
+TEST touch $M0/FILE
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}2
21ab4e
+chown $NEW_UID:$NEW_GID $M0/FILE
21ab4e
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
21ab4e
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
21ab4e
+TEST $CLI volume start $V0 force
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
21ab4e
+
21ab4e
+# setfattr done as NEW_USER fails on 3rd brick with EPERM but suceeds on
21ab4e
+# the first 2 and hence on the mount.
21ab4e
+su -m bug1438255 -c "setfattr -n user.myattr -v myvalue  $M0/FILE"
21ab4e
+TEST [ $? -eq 0 ]
21ab4e
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
21ab4e
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
21ab4e
+# Brick 3 does not have any self-blaming pending xattr.
21ab4e
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
21ab4e
+
21ab4e
+TEST userdel --force ${NEW_USER}
21ab4e
+TEST groupdel ${NEW_USER}-${NEW_GID}
21ab4e
+cleanup
21ab4e
+
21ab4e
+
21ab4e
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
21ab4e
index 75999b5..a3cb766 100644
21ab4e
--- a/xlators/cluster/afr/src/afr-transaction.c
21ab4e
+++ b/xlators/cluster/afr/src/afr-transaction.c
21ab4e
@@ -35,6 +35,7 @@ afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
21ab4e
 int
21ab4e
 afr_changelog_call_count (afr_transaction_type type,
21ab4e
                           unsigned char *pre_op_subvols,
21ab4e
+                          unsigned char *failed_subvols,
21ab4e
                           unsigned int child_count);
21ab4e
 int
21ab4e
 afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
21ab4e
@@ -589,11 +590,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
21ab4e
 int
21ab4e
 afr_changelog_call_count (afr_transaction_type type,
21ab4e
 			  unsigned char *pre_op_subvols,
21ab4e
+                          unsigned char *failed_subvols,
21ab4e
 			  unsigned int child_count)
21ab4e
 {
21ab4e
+        int i = 0;
21ab4e
         int call_count = 0;
21ab4e
 
21ab4e
-	call_count = AFR_COUNT(pre_op_subvols, child_count);
21ab4e
+        for (i = 0; i < child_count; i++) {
21ab4e
+                if (pre_op_subvols[i] && !failed_subvols[i]) {
21ab4e
+                        call_count++;
21ab4e
+                }
21ab4e
+        }
21ab4e
 
21ab4e
         if (type == AFR_ENTRY_RENAME_TRANSACTION)
21ab4e
                 call_count *= 2;
21ab4e
@@ -1334,6 +1341,7 @@ afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
21ab4e
 
21ab4e
         *call_count = afr_changelog_call_count (local->transaction.type,
21ab4e
                                                local->transaction.pre_op,
21ab4e
+                                              local->transaction.failed_subvols,
21ab4e
                                                priv->child_count);
21ab4e
 
21ab4e
         if (*call_count == 0) {
21ab4e
@@ -1400,7 +1408,8 @@ afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
21ab4e
 
21ab4e
         for (i = 0; i < priv->child_count; i++) {
21ab4e
                 /* Means lock did not succeed on this brick */
21ab4e
-                if (!local->transaction.pre_op[i])
21ab4e
+                if (!local->transaction.pre_op[i] ||
21ab4e
+                    local->transaction.failed_subvols[i])
21ab4e
                         continue;
21ab4e
 
21ab4e
                 STACK_WIND_COOKIE (frame, compound_cbk,
21ab4e
@@ -1549,9 +1558,8 @@ afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
21ab4e
         local->c_args = args;
21ab4e
 
21ab4e
         for (i = 0; i < priv->child_count; i++) {
21ab4e
-                /* pre_op[i] has to be true for all nodes that were
21ab4e
-                 * successfully locked. */
21ab4e
-                if (!local->transaction.pre_op[i])
21ab4e
+                if (!local->transaction.pre_op[i] ||
21ab4e
+                    local->transaction.failed_subvols[i])
21ab4e
                         continue;
21ab4e
                 STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk,
21ab4e
                                    (void *) (long) i,
21ab4e
@@ -1593,7 +1601,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
21ab4e
                 return 0;
21ab4e
 
21ab4e
         for (i = 0; i < priv->child_count; i++) {
21ab4e
-                if (!local->transaction.pre_op[i])
21ab4e
+                if (!local->transaction.pre_op[i] ||
21ab4e
+                    local->transaction.failed_subvols[i])
21ab4e
                         continue;
21ab4e
 
21ab4e
                 switch (local->transaction.type) {
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e