|
|
3604df |
From 4e4c3c581daeeb63034bfc89dd4b7a9bcb23e1d2 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Date: Mon, 26 Dec 2016 21:08:03 +0530
|
|
|
3604df |
Subject: [PATCH 259/267] cluster/afr: Fix missing name indices due to EEXIST
|
|
|
3604df |
error
|
|
|
3604df |
|
|
|
3604df |
Backport of: http://review.gluster.org/16286
|
|
|
3604df |
|
|
|
3604df |
PROBLEM:
|
|
|
3604df |
Consider a volume with granular-entry-heal and sharding enabled. When
|
|
|
3604df |
a replica is down and a shard is created as part of a write, the name
|
|
|
3604df |
index is correctly created under indices/entry-changes/<dot-shard-gfid>.
|
|
|
3604df |
Now when a read on the same region triggers another MKNOD, the fop
|
|
|
3604df |
fails on the online bricks with EEXIST. By virtue of this being a
|
|
|
3604df |
symmetric error, the failed_subvols[] array is reset to all zeroes.
|
|
|
3604df |
Because of this, before post-op, the GF_XATTROP_ENTRY_OUT_KEY will be
|
|
|
3604df |
set, causing the name index, which was created in the previous MKNOD
|
|
|
3604df |
operation, to be wrongly deleted in THIS MKNOD operation.
|
|
|
3604df |
|
|
|
3604df |
FIX:
|
|
|
3604df |
The ideal fix would have been for a transaction to delete the name
|
|
|
3604df |
index ONLY if it knows it is the one that created the index in the first
|
|
|
3604df |
place. This would involve gathering information as to whether THIS xattrop
|
|
|
3604df |
created the index from individual bricks, aggregating their responses and
|
|
|
3604df |
based on the various posisble combinations of responses, decide whether to
|
|
|
3604df |
delete the index or not. This is rather complex. Simpler fix would be
|
|
|
3604df |
for post-op to examine local->op_ret in the event of no failed_subvols
|
|
|
3604df |
to figure out whether to delete the name index or not. This can occasionally
|
|
|
3604df |
lead to creation of stale name indices but they won't be affecting the IO path
|
|
|
3604df |
or mess with pending changelogs in any way and self-heal in its crawl of
|
|
|
3604df |
"entry-changes" directory would take care to delete such indices.
|
|
|
3604df |
|
|
|
3604df |
Change-Id: I4e7717d66ed7d5edb942ecd28ab765b65d90ed98
|
|
|
3604df |
BUG: 1408426
|
|
|
3604df |
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/93754
|
|
|
3604df |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
tests/bugs/replicate/bug-1408712.t | 87 +++++++++++++++++++++++++++++++
|
|
|
3604df |
tests/include.rc | 1 +
|
|
|
3604df |
xlators/cluster/afr/src/afr-transaction.c | 10 ++++
|
|
|
3604df |
3 files changed, 98 insertions(+)
|
|
|
3604df |
create mode 100644 tests/bugs/replicate/bug-1408712.t
|
|
|
3604df |
|
|
|
3604df |
diff --git a/tests/bugs/replicate/bug-1408712.t b/tests/bugs/replicate/bug-1408712.t
|
|
|
3604df |
new file mode 100644
|
|
|
3604df |
index 0000000..b26e8a0
|
|
|
3604df |
--- /dev/null
|
|
|
3604df |
+++ b/tests/bugs/replicate/bug-1408712.t
|
|
|
3604df |
@@ -0,0 +1,87 @@
|
|
|
3604df |
+#!/bin/bash
|
|
|
3604df |
+
|
|
|
3604df |
+. $(dirname $0)/../../include.rc
|
|
|
3604df |
+. $(dirname $0)/../../volume.rc
|
|
|
3604df |
+. $(dirname $0)/../../afr.rc
|
|
|
3604df |
+
|
|
|
3604df |
+cleanup
|
|
|
3604df |
+
|
|
|
3604df |
+TESTS_EXPECTED_IN_LOOP=12
|
|
|
3604df |
+
|
|
|
3604df |
+TEST glusterd
|
|
|
3604df |
+TEST pidof glusterd
|
|
|
3604df |
+
|
|
|
3604df |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
3604df |
+TEST $CLI volume start $V0
|
|
|
3604df |
+TEST $CLI volume set $V0 features.shard on
|
|
|
3604df |
+TEST $CLI volume heal $V0 granular-entry-heal enable
|
|
|
3604df |
+TEST $CLI volume set $V0 cluster.data-self-heal off
|
|
|
3604df |
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
|
|
3604df |
+TEST $CLI volume set $V0 cluster.entry-self-heal off
|
|
|
3604df |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
3604df |
+TEST $CLI volume set $V0 performance.flush-behind off
|
|
|
3604df |
+
|
|
|
3604df |
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
3604df |
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
|
|
|
3604df |
+
|
|
|
3604df |
+cd $M0
|
|
|
3604df |
+TEST dd if=/dev/zero of=file bs=1M count=8
|
|
|
3604df |
+
|
|
|
3604df |
+# Kill brick-0.
|
|
|
3604df |
+TEST kill_brick $V0 $H0 $B0/${V0}0
|
|
|
3604df |
+
|
|
|
3604df |
+TEST "dd if=/dev/zero bs=1M count=8 >> file"
|
|
|
3604df |
+
|
|
|
3604df |
+FILE_GFID=$(get_gfid_string $M0/file)
|
|
|
3604df |
+
|
|
|
3604df |
+# Test that the index associated with '/.shard' is created on B1 and B2.
|
|
|
3604df |
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+# Check for successful creation of granular entry indices
|
|
|
3604df |
+for i in {2..3}
|
|
|
3604df |
+do
|
|
|
3604df |
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+done
|
|
|
3604df |
+
|
|
|
3604df |
+cd ~
|
|
|
3604df |
+TEST md5sum $M1/file
|
|
|
3604df |
+
|
|
|
3604df |
+# Test that the index associated with '/.shard' and the created shards do not disappear on B1 and B2.
|
|
|
3604df |
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+for i in {2..3}
|
|
|
3604df |
+do
|
|
|
3604df |
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+done
|
|
|
3604df |
+
|
|
|
3604df |
+# Start the brick that was down
|
|
|
3604df |
+TEST $CLI volume start $V0 force
|
|
|
3604df |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
3604df |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
3604df |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
|
|
|
3604df |
+
|
|
|
3604df |
+# Enable shd
|
|
|
3604df |
+TEST gluster volume set $V0 cluster.self-heal-daemon on
|
|
|
3604df |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
|
3604df |
+
|
|
|
3604df |
+TEST $CLI volume heal $V0
|
|
|
3604df |
+
|
|
|
3604df |
+# Wait for heal to complete
|
|
|
3604df |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
3604df |
+
|
|
|
3604df |
+# Now verify that there are no name indices left after self-heal
|
|
|
3604df |
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+TEST ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
|
|
|
3604df |
+
|
|
|
3604df |
+for i in {2..3}
|
|
|
3604df |
+do
|
|
|
3604df |
+ TEST_IN_LOOP ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+ TEST_IN_LOOP ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
|
|
|
3604df |
+done
|
|
|
3604df |
+
|
|
|
3604df |
+cleanup
|
|
|
3604df |
diff --git a/tests/include.rc b/tests/include.rc
|
|
|
3604df |
index 44f5798..954fb42 100644
|
|
|
3604df |
--- a/tests/include.rc
|
|
|
3604df |
+++ b/tests/include.rc
|
|
|
3604df |
@@ -11,6 +11,7 @@ B0=${B0:=/d/backends}; # top level of brick directories
|
|
|
3604df |
WORKDIRS="$B0 $M0 $M1 $M2 $N0 $N1"
|
|
|
3604df |
|
|
|
3604df |
ROOT_GFID="00000000-0000-0000-0000-000000000001"
|
|
|
3604df |
+DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806"
|
|
|
3604df |
|
|
|
3604df |
META_VOL=${META_VOL:=gluster_shared_storage}; # shared gluster storage volume used by snapshot scheduler, nfs ganesha and geo-rep.
|
|
|
3604df |
META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared gluster volume.
|
|
|
3604df |
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
3604df |
index 9a3b2e9..1e70c1c 100644
|
|
|
3604df |
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
|
3604df |
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
3604df |
@@ -1206,6 +1206,16 @@ afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op,
|
|
|
3604df |
need_entry_key_set = _gf_false;
|
|
|
3604df |
break;
|
|
|
3604df |
}
|
|
|
3604df |
+ /* If the transaction itself did not fail and there
|
|
|
3604df |
+ * are no failed subvolumes, check whether the fop
|
|
|
3604df |
+ * failed due to a symmetric error. If it did, do
|
|
|
3604df |
+ * not set the ENTRY_OUT xattr which would end up
|
|
|
3604df |
+ * deleting a name index which was created possibly by
|
|
|
3604df |
+ * an earlier entry txn that may have failed on some
|
|
|
3604df |
+ * of the sub-volumes.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ if (local->op_ret)
|
|
|
3604df |
+ need_entry_key_set = _gf_false;
|
|
|
3604df |
} else {
|
|
|
3604df |
key = GF_XATTROP_ENTRY_IN_KEY;
|
|
|
3604df |
}
|
|
|
3604df |
--
|
|
|
3604df |
2.9.3
|
|
|
3604df |
|