|
|
887953 |
From 68b0db385ce968547349b187222b9a9401faee12 Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
887953 |
Date: Mon, 27 Aug 2018 11:46:33 +0530
|
|
|
887953 |
Subject: [PATCH 424/444] cluster/afr: Delegate metadata heal with pending
|
|
|
887953 |
xattrs to SHD
|
|
|
887953 |
|
|
|
887953 |
Problem:
|
|
|
887953 |
When metadata-self-heal is triggered on the mount, it blocks
|
|
|
887953 |
lookup until metadata-self-heal completes. But that can lead
|
|
|
887953 |
to hangs when lot of clients are accessing a directory which
|
|
|
887953 |
needs metadata heal and all of them trigger heals waiting
|
|
|
887953 |
for other clients to complete heal.
|
|
|
887953 |
|
|
|
887953 |
Fix:
|
|
|
887953 |
Only when the heal is needed but the pending xattrs are not set,
|
|
|
887953 |
trigger metadata heal that could block lookup. This is the only
|
|
|
887953 |
case where different clients may give different metadata to the
|
|
|
887953 |
clients without heals, which should be avoided.
|
|
|
887953 |
|
|
|
887953 |
BUG: 1619357
|
|
|
887953 |
Upstream Patch: https://review.gluster.org/c/glusterfs/+/21086
|
|
|
887953 |
Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66
|
|
|
887953 |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/155028
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
tests/basic/afr/client-side-heal.t | 28 ++++++++++------
|
|
|
887953 |
tests/bugs/glusterfs/bug-906646.t | 10 ++++--
|
|
|
887953 |
xlators/cluster/afr/src/afr-common.c | 44 ++++++++++++++++++++++++++
|
|
|
887953 |
xlators/cluster/afr/src/afr-self-heal-common.c | 38 ----------------------
|
|
|
887953 |
xlators/cluster/afr/src/afr.h | 3 ++
|
|
|
887953 |
5 files changed, 72 insertions(+), 51 deletions(-)
|
|
|
887953 |
|
|
|
887953 |
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
|
|
|
887953 |
index eba7dc2..1e93361 100755
|
|
|
887953 |
--- a/tests/basic/afr/client-side-heal.t
|
|
|
887953 |
+++ b/tests/basic/afr/client-side-heal.t
|
|
|
887953 |
@@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
887953 |
echo "some data" > $M0/datafile
|
|
|
887953 |
EXPECT 0 echo $?
|
|
|
887953 |
TEST touch $M0/mdatafile
|
|
|
887953 |
+TEST touch $M0/mdatafile-backend-direct-modify
|
|
|
887953 |
TEST mkdir $M0/dir
|
|
|
887953 |
|
|
|
887953 |
#Kill a brick and perform I/O to have pending heals.
|
|
|
887953 |
@@ -29,6 +30,7 @@ EXPECT 0 echo $?
|
|
|
887953 |
|
|
|
887953 |
#pending metadata heal
|
|
|
887953 |
TEST chmod +x $M0/mdatafile
|
|
|
887953 |
+TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
|
|
|
887953 |
|
|
|
887953 |
#pending entry heal. Also causes pending metadata/data heals on file{1..5}
|
|
|
887953 |
TEST touch $M0/dir/file{1..5}
|
|
|
887953 |
@@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force
|
|
|
887953 |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
887953 |
|
|
|
887953 |
#Medatada heal via explicit lookup must not happen
|
|
|
887953 |
-TEST ls $M0/mdatafile
|
|
|
887953 |
+TEST getfattr -d -m. -e hex $M0/mdatafile
|
|
|
887953 |
+TEST ls $M0/mdatafile-backend-direct-modify
|
|
|
887953 |
|
|
|
887953 |
-#Inode refresh must not trigger data and entry heals.
|
|
|
887953 |
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
|
|
887953 |
+
|
|
|
887953 |
+#Inode refresh must not trigger data metadata and entry heals.
|
|
|
887953 |
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
|
|
887953 |
#Check that data heal does not happen.
|
|
|
887953 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
@@ -52,7 +57,6 @@ TEST cat $M0/datafile
|
|
|
887953 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
887953 |
TEST ls $M0/dir
|
|
|
887953 |
-
|
|
|
887953 |
#No heal must have happened
|
|
|
887953 |
EXPECT 8 get_pending_heal_count $V0
|
|
|
887953 |
|
|
|
887953 |
@@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.metadata-self-heal on
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.entry-self-heal on
|
|
|
887953 |
|
|
|
887953 |
-#Metadata heal is triggered by lookup without need for inode refresh.
|
|
|
887953 |
-TEST ls $M0/mdatafile
|
|
|
887953 |
-EXPECT 7 get_pending_heal_count $V0
|
|
|
887953 |
-
|
|
|
887953 |
-#Inode refresh must trigger data and entry heals.
|
|
|
887953 |
+#Inode refresh must trigger data metadata and entry heals.
|
|
|
887953 |
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
|
|
887953 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
887953 |
+TEST ls $M0/mdatafile-backend-direct-modify
|
|
|
887953 |
+
|
|
|
887953 |
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
|
|
887953 |
+
|
|
|
887953 |
+
|
|
|
887953 |
+TEST getfattr -d -m. -e hex $M0/mdatafile
|
|
|
887953 |
+EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
|
|
|
887953 |
+
|
|
|
887953 |
TEST cat $M0/datafile
|
|
|
887953 |
EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
|
|
|
887953 |
|
|
|
887953 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
887953 |
TEST ls $M0/dir
|
|
|
887953 |
-EXPECT 5 get_pending_heal_count $V0
|
|
|
887953 |
+EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
|
|
|
887953 |
|
|
|
887953 |
TEST cat $M0/dir/file1
|
|
|
887953 |
TEST cat $M0/dir/file2
|
|
|
887953 |
@@ -83,5 +91,5 @@ TEST cat $M0/dir/file3
|
|
|
887953 |
TEST cat $M0/dir/file4
|
|
|
887953 |
TEST cat $M0/dir/file5
|
|
|
887953 |
|
|
|
887953 |
-EXPECT 0 get_pending_heal_count $V0
|
|
|
887953 |
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
|
|
887953 |
cleanup;
|
|
|
887953 |
diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t
|
|
|
887953 |
index 45c85d9..37b8fe5 100644
|
|
|
887953 |
--- a/tests/bugs/glusterfs/bug-906646.t
|
|
|
887953 |
+++ b/tests/bugs/glusterfs/bug-906646.t
|
|
|
887953 |
@@ -13,7 +13,6 @@ TEST pidof glusterd
|
|
|
887953 |
TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
|
|
|
887953 |
TEST $CLI volume start $V0
|
|
|
887953 |
|
|
|
887953 |
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
|
|
887953 |
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
|
|
|
887953 |
|
|
|
887953 |
## Mount FUSE with caching disabled
|
|
|
887953 |
@@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
|
|
887953 |
# restart the brick process
|
|
|
887953 |
TEST $CLI volume start $V0 force
|
|
|
887953 |
|
|
|
887953 |
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1`
|
|
|
887953 |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
|
887953 |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
|
|
|
887953 |
|
|
|
887953 |
-cat $pth >/dev/null
|
|
|
887953 |
+TEST $CLI volume heal $V0
|
|
|
887953 |
|
|
|
887953 |
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
|
|
887953 |
# check backends - xattr should not be present anywhere
|
|
|
887953 |
EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
|
|
|
887953 |
EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
index e8107c9..e74fdec 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
887953 |
@@ -2571,6 +2571,42 @@ out:
|
|
|
887953 |
return 0;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+gf_boolean_t
|
|
|
887953 |
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
|
|
887953 |
+{
|
|
|
887953 |
+ int idx = -1;
|
|
|
887953 |
+ afr_private_t *priv = NULL;
|
|
|
887953 |
+ void *pending_raw = NULL;
|
|
|
887953 |
+ int *pending_int = NULL;
|
|
|
887953 |
+ int i = 0;
|
|
|
887953 |
+
|
|
|
887953 |
+ priv = this->private;
|
|
|
887953 |
+ idx = afr_index_for_transaction_type (type);
|
|
|
887953 |
+
|
|
|
887953 |
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
|
|
887953 |
+ if (pending_raw) {
|
|
|
887953 |
+ pending_int = pending_raw;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (ntoh32 (pending_int[idx]))
|
|
|
887953 |
+ return _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ for (i = 0; i < priv->child_count; i++) {
|
|
|
887953 |
+ if (dict_get_ptr (xdata, priv->pending_key[i],
|
|
|
887953 |
+ &pending_raw))
|
|
|
887953 |
+ continue;
|
|
|
887953 |
+ if (!pending_raw)
|
|
|
887953 |
+ continue;
|
|
|
887953 |
+ pending_int = pending_raw;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (ntoh32 (pending_int[idx]))
|
|
|
887953 |
+ return _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ return _gf_false;
|
|
|
887953 |
+}
|
|
|
887953 |
+
|
|
|
887953 |
static gf_boolean_t
|
|
|
887953 |
afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
{
|
|
|
887953 |
@@ -2597,6 +2633,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
continue;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ if (afr_is_pending_set (this, replies[i].xdata,
|
|
|
887953 |
+ AFR_METADATA_TRANSACTION)) {
|
|
|
887953 |
+ /* Let shd do the heal so that lookup is not blocked
|
|
|
887953 |
+ * on getting metadata lock/doing the heal */
|
|
|
887953 |
+ start = _gf_false;
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
|
|
|
887953 |
start = _gf_false;
|
|
|
887953 |
break;
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
887953 |
index d04f11d..c6ee75b 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
887953 |
@@ -2182,44 +2182,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
887953 |
return 0;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
-
|
|
|
887953 |
-gf_boolean_t
|
|
|
887953 |
-afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
|
|
887953 |
-{
|
|
|
887953 |
- int idx = -1;
|
|
|
887953 |
- afr_private_t *priv = NULL;
|
|
|
887953 |
- void *pending_raw = NULL;
|
|
|
887953 |
- int *pending_int = NULL;
|
|
|
887953 |
- int i = 0;
|
|
|
887953 |
-
|
|
|
887953 |
- priv = this->private;
|
|
|
887953 |
- idx = afr_index_for_transaction_type (type);
|
|
|
887953 |
-
|
|
|
887953 |
- if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
|
|
887953 |
- if (pending_raw) {
|
|
|
887953 |
- pending_int = pending_raw;
|
|
|
887953 |
-
|
|
|
887953 |
- if (ntoh32 (pending_int[idx]))
|
|
|
887953 |
- return _gf_true;
|
|
|
887953 |
- }
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
- for (i = 0; i < priv->child_count; i++) {
|
|
|
887953 |
- if (dict_get_ptr (xdata, priv->pending_key[i],
|
|
|
887953 |
- &pending_raw))
|
|
|
887953 |
- continue;
|
|
|
887953 |
- if (!pending_raw)
|
|
|
887953 |
- continue;
|
|
|
887953 |
- pending_int = pending_raw;
|
|
|
887953 |
-
|
|
|
887953 |
- if (ntoh32 (pending_int[idx]))
|
|
|
887953 |
- return _gf_true;
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
- return _gf_false;
|
|
|
887953 |
-}
|
|
|
887953 |
-
|
|
|
887953 |
-
|
|
|
887953 |
gf_boolean_t
|
|
|
887953 |
afr_is_data_set (xlator_t *this, dict_t *xdata)
|
|
|
887953 |
{
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
index 2e6d995..af9dbc8 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
@@ -1225,4 +1225,7 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
|
|
|
887953 |
|
|
|
887953 |
gf_boolean_t
|
|
|
887953 |
afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
|
|
|
887953 |
+
|
|
|
887953 |
+gf_boolean_t
|
|
|
887953 |
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type);
|
|
|
887953 |
#endif /* __AFR_H__ */
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|