Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch

Blob History Raw

		9ae3f9	`From 2b2eb846c49caba13ab92ec66af20292e7780fc1 Mon Sep 17 00:00:00 2001`
		9ae3f9	`From: Ravishankar N <ravishankar@redhat.com>`
		9ae3f9	`Date: Tue, 11 Feb 2020 14:34:48 +0530`
		9ae3f9	`Subject: [PATCH 410/449] afr: prevent spurious entry heals leading to gfid`
		9ae3f9	`split-brain`
		9ae3f9
		9ae3f9	`Problem:`
		9ae3f9	`In a hyperconverged setup with granular-entry-heal enabled, if a file is`
		9ae3f9	`recreated while one of the bricks is down, and an index heal is triggered`
		9ae3f9	`(with the brick still down), entry-self heal was doing a spurious heal`
		9ae3f9	`with just the 2 good bricks. It was doing a post-op leading to removal`
		9ae3f9	`of the filename from .glusterfs/indices/entry-changes as well as`
		9ae3f9	`erroneous setting of afr xattrs on the parent. When the brick came up,`
		9ae3f9	`the xattrs were cleared, resulting in the renamed file not getting`
		9ae3f9	`healed and leading to gfid split-brain and EIO on the mount.`
		9ae3f9
		9ae3f9	`Fix:`
		9ae3f9	`Proceed with entry heal only when shd can connect to all bricks of the replica,`
		9ae3f9	`just like in data and metadata heal.`
		9ae3f9
		9ae3f9	`BUG: 1804164`
		9ae3f9
		9ae3f9	`> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/`
		9ae3f9	`> fixes: bz#1801624`
		9ae3f9	`> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e`
		9ae3f9	`> Signed-off-by: Ravishankar N <ravishankar@redhat.com>`
		9ae3f9
		9ae3f9	`Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7`
		9ae3f9	`Signed-off-by: Ravishankar N <ravishankar@redhat.com>`
		9ae3f9	`Reviewed-on: https://code.engineering.redhat.com/gerrit/202395`
		9ae3f9	`Tested-by: RHGS Build Bot <nigelb@redhat.com>`
		9ae3f9	`Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>`
		9ae3f9	`---`
		9ae3f9	`.../bug-1433571-undo-pending-only-on-up-bricks.t \| 18 ++-----`
		9ae3f9	`tests/bugs/replicate/bug-1801624-entry-heal.t \| 58 ++++++++++++++++++++++`
		9ae3f9	`xlators/cluster/afr/src/afr-common.c \| 4 +-`
		9ae3f9	`xlators/cluster/afr/src/afr-self-heal-common.c \| 8 +--`
		9ae3f9	`xlators/cluster/afr/src/afr-self-heal-entry.c \| 6 +--`
		9ae3f9	`xlators/cluster/afr/src/afr-self-heal-name.c \| 2 +-`
		9ae3f9	`xlators/cluster/afr/src/afr-self-heal.h \| 2 -`
		9ae3f9	`7 files changed, 69 insertions(+), 29 deletions(-)`
		9ae3f9	`create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t`
		9ae3f9
		9ae3f9	`diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t`
		9ae3f9	`index 0767f47..10ce013 100644`
		9ae3f9	`--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t`
		9ae3f9	`+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t`
		9ae3f9	`@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force`
		9ae3f9	`EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0`
		9ae3f9	`EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2`
		9ae3f9
		9ae3f9	`-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.`
		9ae3f9	`-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.`
		9ae3f9	`-TEST kill_brick $V0 $H0 $B0/${V0}0`
		9ae3f9	`+# We were killing one brick and checking that entry heal does not reset the`
		9ae3f9	`+# pending xattrs for the down brick. Now that we need all bricks to be up for`
		9ae3f9	`+# entry heal, I'm removing that test from the .t`
		9ae3f9	`+`
		9ae3f9	`TEST $CLI volume set $V0 cluster.data-self-heal on`
		9ae3f9	`TEST $CLI volume set $V0 cluster.metadata-self-heal on`
		9ae3f9	`TEST $CLI volume set $V0 cluster.entry-self-heal on`
		9ae3f9
		9ae3f9	`TEST ls $M0`
		9ae3f9	`-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1`
		9ae3f9	`-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2`
		9ae3f9	`-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1`
		9ae3f9	`-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2`
		9ae3f9	`-`
		9ae3f9	`-#Bring back all the bricks and trigger the heal again by doing ls. Now the`
		9ae3f9	`-#pending xattrs on all the bricks should be 0.`
		9ae3f9	`-TEST $CLI volume start $V0 force`
		9ae3f9	`-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0`
		9ae3f9	`-TEST ls $M0`
		9ae3f9	`-`
		9ae3f9	`TEST cat $M0/f1`
		9ae3f9	`TEST cat $M0/f2`
		9ae3f9	`TEST cat $M0/f3`
		9ae3f9	`diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t`
		9ae3f9	`new file mode 100644`
		9ae3f9	`index 0000000..94b4651`
		9ae3f9	`--- /dev/null`
		9ae3f9	`+++ b/tests/bugs/replicate/bug-1801624-entry-heal.t`
		9ae3f9	`@@ -0,0 +1,58 @@`
		9ae3f9	`+#!/bin/bash`
		9ae3f9	`+`
		9ae3f9	`+. $(dirname $0)/../../include.rc`
		9ae3f9	`+. $(dirname $0)/../../volume.rc`
		9ae3f9	`+cleanup;`
		9ae3f9	`+`
		9ae3f9	`+TEST glusterd`
		9ae3f9	`+TEST pidof glusterd`
		9ae3f9	`+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}`
		9ae3f9	`+TEST $CLI volume set $V0 heal-timeout 5`
		9ae3f9	`+TEST $CLI volume start $V0`
		9ae3f9	`+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0`
		9ae3f9	`+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1`
		9ae3f9	`+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2`
		9ae3f9	`+TEST $CLI volume heal $V0 granular-entry-heal enable`
		9ae3f9	`+`
		9ae3f9	`+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2`
		9ae3f9	`+echo "Data">$M0/FILE`
		9ae3f9	`+ret=$?`
		9ae3f9	`+TEST [ $ret -eq 0 ]`
		9ae3f9	`+`
		9ae3f9	`+# Re-create the file when a brick is down.`
		9ae3f9	`+TEST kill_brick $V0 $H0 $B0/brick1`
		9ae3f9	`+TEST rm $M0/FILE`
		9ae3f9	`+echo "New Data">$M0/FILE`
		9ae3f9	`+ret=$?`
		9ae3f9	`+TEST [ $ret -eq 0 ]`
		9ae3f9	`+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0`
		9ae3f9	`+`
		9ae3f9	`+# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.`
		9ae3f9	`+$CLI volume heal $V0 # CLI will fail but heal is launched anyway.`
		9ae3f9	`+TEST sleep 5 # give index heal a chance to do one run.`
		9ae3f9	`+brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)`
		9ae3f9	`+brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)`
		9ae3f9	`+TEST [ $brick0_pending -eq "000000000000000000000002" ]`
		9ae3f9	`+TEST [ $brick2_pending -eq "000000000000000000000002" ]`
		9ae3f9	`+EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/`
		9ae3f9	`+EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/`
		9ae3f9	`+`
		9ae3f9	`+TEST $CLI volume start $V0 force`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1`
		9ae3f9	`+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1`
		9ae3f9	`+$CLI volume heal $V0`
		9ae3f9	`+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0`
		9ae3f9	`+`
		9ae3f9	`+# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.`
		9ae3f9	`+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0`
		9ae3f9	`+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1`
		9ae3f9	`+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2`
		9ae3f9	`+TEST cat $M0/FILE`
		9ae3f9	`+`
		9ae3f9	`+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0`
		9ae3f9	`+cleanup;`
		9ae3f9	`diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c`
		9ae3f9	`index 32127c6..5806556 100644`
		9ae3f9	`--- a/xlators/cluster/afr/src/afr-common.c`
		9ae3f9	`+++ b/xlators/cluster/afr/src/afr-common.c`
		9ae3f9	`@@ -6629,7 +6629,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)`
		9ae3f9	`ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,`
		9ae3f9	`locked_on);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS)`
		9ae3f9	`+ if (ret < priv->child_count)`
		9ae3f9	`goto data_unlock;`
		9ae3f9	`ret = __afr_selfheal_data_prepare(`
		9ae3f9	`heal_frame, this, inode, locked_on, sources, sinks,`
		9ae3f9	`@@ -6646,7 +6646,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)`
		9ae3f9	`ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,`
		9ae3f9	`LLONG_MAX - 1, 0, locked_on);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS)`
		9ae3f9	`+ if (ret < priv->child_count)`
		9ae3f9	`goto mdata_unlock;`
		9ae3f9	`ret = __afr_selfheal_metadata_prepare(`
		9ae3f9	`heal_frame, this, inode, locked_on, sources, sinks,`
		9ae3f9	`diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c`
		9ae3f9	`index 81ef38a..ce1ea50 100644`
		9ae3f9	`--- a/xlators/cluster/afr/src/afr-self-heal-common.c`
		9ae3f9	`+++ b/xlators/cluster/afr/src/afr-self-heal-common.c`
		9ae3f9	`@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t frame, xlator_t this,`
		9ae3f9	`char accused = NULL; / Accused others without any self-accusal */`
		9ae3f9	`char pending = NULL; / Have pending operations on others */`
		9ae3f9	`char self_accused = NULL; / Accused itself */`
		9ae3f9	`- int min_participants = -1;`
		9ae3f9
		9ae3f9	`priv = this->private;`
		9ae3f9
		9ae3f9	`@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t frame, xlator_t this,`
		9ae3f9	`}`
		9ae3f9	`}`
		9ae3f9
		9ae3f9	`- if (type == AFR_DATA_TRANSACTION \|\| type == AFR_METADATA_TRANSACTION) {`
		9ae3f9	`- min_participants = priv->child_count;`
		9ae3f9	`- } else {`
		9ae3f9	`- min_participants = AFR_SH_MIN_PARTICIPANTS;`
		9ae3f9	`- }`
		9ae3f9	`- if (afr_success_count(replies, priv->child_count) < min_participants) {`
		9ae3f9	`+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {`
		9ae3f9	`/* Treat this just like locks not being acquired */`
		9ae3f9	`return -ENOTCONN;`
		9ae3f9	`}`
		9ae3f9	`diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c`
		9ae3f9	`index 3ce882e..40be898 100644`
		9ae3f9	`--- a/xlators/cluster/afr/src/afr-self-heal-entry.c`
		9ae3f9	`+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c`
		9ae3f9	`@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t frame, xlator_t this, fd_t *fd,`
		9ae3f9	`ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,`
		9ae3f9	`locked_on);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS) {`
		9ae3f9	`+ if (ret < priv->child_count) {`
		9ae3f9	`gf_msg_debug(this->name, 0,`
		9ae3f9	`"%s: Skipping "`
		9ae3f9	`"entry self-heal as only %d sub-volumes "`
		9ae3f9	`@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t frame, xlator_t this, fd_t *fd,`
		9ae3f9	`ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,`
		9ae3f9	`data_lock);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS) {`
		9ae3f9	`+ if (ret < priv->child_count) {`
		9ae3f9	`gf_msg_debug(this->name, 0,`
		9ae3f9	`"%s: Skipping "`
		9ae3f9	`"entry self-heal as only %d sub-volumes could "`
		9ae3f9	`@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t frame, xlator_t this, inode_t *inode)`
		9ae3f9	`ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,`
		9ae3f9	`NULL, locked_on);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS) {`
		9ae3f9	`+ if (ret < priv->child_count) {`
		9ae3f9	`gf_msg_debug(this->name, 0,`
		9ae3f9	`"%s: Skipping "`
		9ae3f9	`"entry self-heal as only %d sub-volumes could "`
		9ae3f9	`diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c`
		9ae3f9	`index 36640b5..7d4f208 100644`
		9ae3f9	`--- a/xlators/cluster/afr/src/afr-self-heal-name.c`
		9ae3f9	`+++ b/xlators/cluster/afr/src/afr-self-heal-name.c`
		9ae3f9	`@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t frame, xlator_t this, inode_t *parent,`
		9ae3f9	`ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,`
		9ae3f9	`locked_on);`
		9ae3f9	`{`
		9ae3f9	`- if (ret < AFR_SH_MIN_PARTICIPANTS) {`
		9ae3f9	`+ if (ret < priv->child_count) {`
		9ae3f9	`ret = -ENOTCONN;`
		9ae3f9	`goto unlock;`
		9ae3f9	`}`
		9ae3f9	`diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h`
		9ae3f9	`index 6555ec5..8234cec 100644`
		9ae3f9	`--- a/xlators/cluster/afr/src/afr-self-heal.h`
		9ae3f9	`+++ b/xlators/cluster/afr/src/afr-self-heal.h`
		9ae3f9	`@@ -11,8 +11,6 @@`
		9ae3f9	`#ifndef _AFR_SELFHEAL_H`
		9ae3f9	`#define _AFR_SELFHEAL_H`
		9ae3f9
		9ae3f9	`-#define AFR_SH_MIN_PARTICIPANTS 2`
		9ae3f9	`-`
		9ae3f9	`/* Perform fop on all UP subvolumes and wait for all callbacks to return */`
		9ae3f9
		9ae3f9	`#define AFR_ONALL(frame, rfn, fop, args...) \`
		9ae3f9	`--`
		9ae3f9	`1.8.3.1`
		9ae3f9

rpms / glusterfs

Source Code

Blame SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch