|
|
887953 |
From 03e4bab925b20832492c9954d3ecb6c10fe56548 Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
887953 |
Date: Wed, 10 Oct 2018 17:57:33 +0530
|
|
|
887953 |
Subject: [PATCH 403/404] afr: prevent winding inodelks twice for arbiter
|
|
|
887953 |
volumes
|
|
|
887953 |
|
|
|
887953 |
Backport of https://review.gluster.org/#/c/glusterfs/+/21380/
|
|
|
887953 |
|
|
|
887953 |
Problem:
|
|
|
887953 |
In an arbiter volume, if there is a pending data heal of a file only on
|
|
|
887953 |
arbiter brick, self-heal takes inodelks twice due to a code-bug but unlocks
|
|
|
887953 |
it only once, leaving behind a stale lock on the brick. This causes
|
|
|
887953 |
the next write to the file to hang.
|
|
|
887953 |
|
|
|
887953 |
Fix:
|
|
|
887953 |
Fix the code-bug to take lock only once. This bug was introduced master
|
|
|
887953 |
with commit eb472d82a083883335bc494b87ea175ac43471ff
|
|
|
887953 |
|
|
|
887953 |
Thanks to Pranith Kumar K <pkarampu@redhat.com> for finding the RCA.
|
|
|
887953 |
|
|
|
887953 |
Change-Id: I15ad969e10a6a3c4bd255e2948b6be6dcddc61e1
|
|
|
887953 |
BUG: 1636902
|
|
|
887953 |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/152552
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
.../bug-1637802-arbiter-stale-data-heal-lock.t | 44 ++++++++++++++++++++++
|
|
|
887953 |
xlators/cluster/afr/src/afr-self-heal-data.c | 2 +-
|
|
|
887953 |
2 files changed, 45 insertions(+), 1 deletion(-)
|
|
|
887953 |
create mode 100644 tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
|
|
|
887953 |
|
|
|
887953 |
diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
|
|
|
887953 |
new file mode 100644
|
|
|
887953 |
index 0000000..91ed39b
|
|
|
887953 |
--- /dev/null
|
|
|
887953 |
+++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
|
|
|
887953 |
@@ -0,0 +1,44 @@
|
|
|
887953 |
+#!/bin/bash
|
|
|
887953 |
+
|
|
|
887953 |
+. $(dirname $0)/../../include.rc
|
|
|
887953 |
+. $(dirname $0)/../../volume.rc
|
|
|
887953 |
+. $(dirname $0)/../../afr.rc
|
|
|
887953 |
+
|
|
|
887953 |
+cleanup;
|
|
|
887953 |
+
|
|
|
887953 |
+# Test to check that data self-heal does not leave any stale lock.
|
|
|
887953 |
+
|
|
|
887953 |
+TEST glusterd;
|
|
|
887953 |
+TEST pidof glusterd;
|
|
|
887953 |
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
|
|
|
887953 |
+TEST $CLI volume start $V0;
|
|
|
887953 |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
|
|
887953 |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
|
|
|
887953 |
+
|
|
|
887953 |
+# Create base entry in indices/xattrop
|
|
|
887953 |
+echo "Data" > $M0/FILE
|
|
|
887953 |
+
|
|
|
887953 |
+# Kill arbiter brick and write to FILE.
|
|
|
887953 |
+TEST kill_brick $V0 $H0 $B0/${V0}2
|
|
|
887953 |
+echo "arbiter down" >> $M0/FILE
|
|
|
887953 |
+EXPECT 2 get_pending_heal_count $V0
|
|
|
887953 |
+
|
|
|
887953 |
+# Bring it back up and let heal complete.
|
|
|
887953 |
+TEST $CLI volume start $V0 force
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
|
887953 |
+TEST $CLI volume heal $V0
|
|
|
887953 |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
887953 |
+
|
|
|
887953 |
+# write to the FILE must succeed.
|
|
|
887953 |
+echo "this must succeed" >> $M0/FILE
|
|
|
887953 |
+TEST [ $? -eq 0 ]
|
|
|
887953 |
+cleanup;
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
887953 |
index d3deb8f..2ac6e47 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
887953 |
@@ -765,7 +765,7 @@ restore_time:
|
|
|
887953 |
afr_selfheal_restore_time (frame, this, fd->inode, source,
|
|
|
887953 |
healed_sinks, locked_replies);
|
|
|
887953 |
|
|
|
887953 |
- if (!is_arbiter_the_only_sink || !empty_file) {
|
|
|
887953 |
+ if (!is_arbiter_the_only_sink && !empty_file) {
|
|
|
887953 |
ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
|
|
|
887953 |
0, 0, data_lock);
|
|
|
887953 |
if (ret < priv->child_count) {
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|