Blob Blame History Raw
From 8c6bf26a112521284a62082f5796df8f9b504f54 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Fri, 31 Mar 2017 15:17:42 +0530
Subject: [PATCH 369/369] cluster/afr: Undo pending xattrs only on the up
 bricks

Problem:
While doing conservative merge, even if a brick is down, it will reset
the pending xattr on that. When that brick comes up, as part of the
heal, it will consider this brick as the source and removes the entries
on the other bricks, which leads to data loss.

Fix: Undo pending only for the bricks which are up.

> Reviewed-on: https://review.gluster.org/16913
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Ravishankar N <ravishankar@redhat.com>

Change-Id: Ie166ac2e738f0b231b42082855f3c1c57797e7a7
BUG: 1437773
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/102101
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 .../bug-1433571-undo-pending-only-on-up-bricks.t   | 89 ++++++++++++++++++++++
 xlators/cluster/afr/src/afr-self-heal-common.c     |  2 +-
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t

diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
new file mode 100644
index 0000000..271abb4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Disable self-heal-daemon, client-side-heal and set quorum-type to none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.quorum-type none
+
+#Kill bricks 0 & 1 and create a file to have pending entry for 0 & 1 on brick 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "file 1" >> $M0/f1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Kill bricks 1 & 2 and create a file to have pending entry for 1 & 2 on brick 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "file 2" >> $M0/f2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Kill bricks 2 & 0 and create a file to have pending entry for 2 & 0 on brick 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "file 3" >> $M0/f3
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.
+#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST ls $M0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+#Bring back all the bricks and trigger the heal again by doing ls. Now the
+#pending xattrs on all the bricks should be 0.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ls $M0
+
+TEST cat $M0/f1
+TEST cat $M0/f2
+TEST cat $M0/f3
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+#Check whether all the bricks contains all the 3 files.
+EXPECT "3" echo $(ls $B0/${V0}0 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}1 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}2 | wc -l)
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 8b2c3fa..9f6cbcd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -205,7 +205,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
 				output_matrix[i][j] = 1;
                                 if (type == AFR_ENTRY_TRANSACTION)
                                         full_heal_mtx_out[i][j] = 1;
-			} else {
+			} else if (locked_on[j]) {
 				output_matrix[i][j] = -input_matrix[i][j];
                                 if (type == AFR_ENTRY_TRANSACTION)
                                         full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
-- 
1.8.3.1