887953
From 68b0db385ce968547349b187222b9a9401faee12 Mon Sep 17 00:00:00 2001
887953
From: Pranith Kumar K <pkarampu@redhat.com>
887953
Date: Mon, 27 Aug 2018 11:46:33 +0530
887953
Subject: [PATCH 424/444] cluster/afr: Delegate metadata heal with pending
887953
 xattrs to SHD
887953
887953
Problem:
887953
When metadata-self-heal is triggered on the mount, it blocks
887953
lookup until metadata-self-heal completes. But that can lead
887953
to hangs when lot of clients are accessing a directory which
887953
needs metadata heal and all of them trigger heals waiting
887953
for other clients to complete heal.
887953
887953
Fix:
887953
Only when the heal is needed but the pending xattrs are not set,
887953
trigger metadata heal that could block lookup. This is the only
887953
case where different clients may give different metadata to the
887953
clients without heals, which should be avoided.
887953
887953
BUG: 1619357
887953
Upstream Patch: https://review.gluster.org/c/glusterfs/+/21086
887953
Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66
887953
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/155028
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
887953
---
887953
 tests/basic/afr/client-side-heal.t             | 28 ++++++++++------
887953
 tests/bugs/glusterfs/bug-906646.t              | 10 ++++--
887953
 xlators/cluster/afr/src/afr-common.c           | 44 ++++++++++++++++++++++++++
887953
 xlators/cluster/afr/src/afr-self-heal-common.c | 38 ----------------------
887953
 xlators/cluster/afr/src/afr.h                  |  3 ++
887953
 5 files changed, 72 insertions(+), 51 deletions(-)
887953
887953
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
887953
index eba7dc2..1e93361 100755
887953
--- a/tests/basic/afr/client-side-heal.t
887953
+++ b/tests/basic/afr/client-side-heal.t
887953
@@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
887953
 echo "some data" > $M0/datafile
887953
 EXPECT 0 echo $?
887953
 TEST touch $M0/mdatafile
887953
+TEST touch $M0/mdatafile-backend-direct-modify
887953
 TEST mkdir $M0/dir
887953
 
887953
 #Kill a brick and perform I/O to have pending heals.
887953
@@ -29,6 +30,7 @@ EXPECT 0 echo $?
887953
 
887953
 #pending metadata heal
887953
 TEST chmod +x $M0/mdatafile
887953
+TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
887953
 
887953
 #pending entry heal. Also causes pending metadata/data heals on file{1..5}
887953
 TEST touch $M0/dir/file{1..5}
887953
@@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force
887953
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
887953
 
887953
 #Medatada heal via explicit lookup must not happen
887953
-TEST ls $M0/mdatafile
887953
+TEST getfattr -d -m. -e hex $M0/mdatafile
887953
+TEST ls $M0/mdatafile-backend-direct-modify
887953
 
887953
-#Inode refresh must not trigger data and entry heals.
887953
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
887953
+
887953
+#Inode refresh must not trigger data metadata and entry heals.
887953
 #To trigger inode refresh for sure, the volume is unmounted and mounted each time.
887953
 #Check that data heal does not happen.
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
@@ -52,7 +57,6 @@ TEST cat $M0/datafile
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
887953
 TEST ls $M0/dir
887953
-
887953
 #No heal must have happened
887953
 EXPECT 8 get_pending_heal_count $V0
887953
 
887953
@@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on
887953
 TEST $CLI volume set $V0 cluster.metadata-self-heal on
887953
 TEST $CLI volume set $V0 cluster.entry-self-heal on
887953
 
887953
-#Metadata heal is triggered by lookup without need for inode refresh.
887953
-TEST ls $M0/mdatafile
887953
-EXPECT 7 get_pending_heal_count $V0
887953
-
887953
-#Inode refresh must trigger data and entry heals.
887953
+#Inode refresh must trigger data metadata and entry heals.
887953
 #To trigger inode refresh for sure, the volume is unmounted and mounted each time.
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
887953
+TEST ls $M0/mdatafile-backend-direct-modify
887953
+
887953
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
887953
+
887953
+
887953
+TEST getfattr -d -m. -e hex $M0/mdatafile
887953
+EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
887953
+
887953
 TEST cat $M0/datafile
887953
 EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
887953
 
887953
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
887953
 TEST ls $M0/dir
887953
-EXPECT 5 get_pending_heal_count $V0
887953
+EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
887953
 
887953
 TEST cat  $M0/dir/file1
887953
 TEST cat  $M0/dir/file2
887953
@@ -83,5 +91,5 @@ TEST cat  $M0/dir/file3
887953
 TEST cat  $M0/dir/file4
887953
 TEST cat  $M0/dir/file5
887953
 
887953
-EXPECT 0 get_pending_heal_count $V0
887953
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
887953
 cleanup;
887953
diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t
887953
index 45c85d9..37b8fe5 100644
887953
--- a/tests/bugs/glusterfs/bug-906646.t
887953
+++ b/tests/bugs/glusterfs/bug-906646.t
887953
@@ -13,7 +13,6 @@ TEST pidof glusterd
887953
 TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
887953
 TEST $CLI volume start $V0
887953
 
887953
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
887953
 TEST $CLI volume set $V0 cluster.background-self-heal-count 0
887953
 
887953
 ## Mount FUSE with caching disabled
887953
@@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
887953
 # restart the brick process
887953
 TEST $CLI volume start $V0 force
887953
 
887953
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1`
887953
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
887953
 
887953
-cat $pth >/dev/null
887953
+TEST $CLI volume heal $V0
887953
 
887953
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
887953
 # check backends - xattr should not be present anywhere
887953
 EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
887953
 EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
887953
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
887953
index e8107c9..e74fdec 100644
887953
--- a/xlators/cluster/afr/src/afr-common.c
887953
+++ b/xlators/cluster/afr/src/afr-common.c
887953
@@ -2571,6 +2571,42 @@ out:
887953
         return 0;
887953
 }
887953
 
887953
+gf_boolean_t
887953
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
887953
+{
887953
+        int idx = -1;
887953
+        afr_private_t *priv = NULL;
887953
+        void *pending_raw = NULL;
887953
+        int *pending_int = NULL;
887953
+        int i = 0;
887953
+
887953
+        priv = this->private;
887953
+        idx = afr_index_for_transaction_type (type);
887953
+
887953
+        if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
887953
+                if (pending_raw) {
887953
+                        pending_int = pending_raw;
887953
+
887953
+                        if (ntoh32 (pending_int[idx]))
887953
+                                return _gf_true;
887953
+                }
887953
+        }
887953
+
887953
+        for (i = 0; i < priv->child_count; i++) {
887953
+                if (dict_get_ptr (xdata, priv->pending_key[i],
887953
+                                  &pending_raw))
887953
+                        continue;
887953
+                if (!pending_raw)
887953
+                        continue;
887953
+                pending_int = pending_raw;
887953
+
887953
+                if (ntoh32 (pending_int[idx]))
887953
+                        return _gf_true;
887953
+        }
887953
+
887953
+        return _gf_false;
887953
+}
887953
+
887953
 static gf_boolean_t
887953
 afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
887953
 {
887953
@@ -2597,6 +2633,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
887953
                         continue;
887953
                 }
887953
 
887953
+                if (afr_is_pending_set (this, replies[i].xdata,
887953
+                                        AFR_METADATA_TRANSACTION)) {
887953
+                        /* Let shd do the heal so that lookup is not blocked
887953
+                         * on getting metadata lock/doing the heal */
887953
+                        start = _gf_false;
887953
+                        break;
887953
+                }
887953
+
887953
                 if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
887953
                         start = _gf_false;
887953
                         break;
887953
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
887953
index d04f11d..c6ee75b 100644
887953
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
887953
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
887953
@@ -2182,44 +2182,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
887953
 	return 0;
887953
 }
887953
 
887953
-
887953
-gf_boolean_t
887953
-afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
887953
-{
887953
-	int idx = -1;
887953
-	afr_private_t *priv = NULL;
887953
-	void *pending_raw = NULL;
887953
-	int *pending_int = NULL;
887953
-	int i = 0;
887953
-
887953
-	priv = this->private;
887953
-	idx = afr_index_for_transaction_type (type);
887953
-
887953
-	if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
887953
-		if (pending_raw) {
887953
-			pending_int = pending_raw;
887953
-
887953
-			if (ntoh32 (pending_int[idx]))
887953
-				return _gf_true;
887953
-		}
887953
-	}
887953
-
887953
-	for (i = 0; i < priv->child_count; i++) {
887953
-		if (dict_get_ptr (xdata, priv->pending_key[i],
887953
-				  &pending_raw))
887953
-			continue;
887953
-		if (!pending_raw)
887953
-			continue;
887953
-		pending_int = pending_raw;
887953
-
887953
-		if (ntoh32 (pending_int[idx]))
887953
-			return _gf_true;
887953
-	}
887953
-
887953
-	return _gf_false;
887953
-}
887953
-
887953
-
887953
 gf_boolean_t
887953
 afr_is_data_set (xlator_t *this, dict_t *xdata)
887953
 {
887953
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
887953
index 2e6d995..af9dbc8 100644
887953
--- a/xlators/cluster/afr/src/afr.h
887953
+++ b/xlators/cluster/afr/src/afr.h
887953
@@ -1225,4 +1225,7 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
887953
 
887953
 gf_boolean_t
887953
 afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
887953
+
887953
+gf_boolean_t
887953
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type);
887953
 #endif /* __AFR_H__ */
887953
-- 
887953
1.8.3.1
887953