|
|
d1681e |
From 8217d00a0a54457961e7ec7d3afb24e953923c7d Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Ashish Pandey <aspandey@redhat.com>
|
|
|
d1681e |
Date: Tue, 13 Mar 2018 14:03:20 +0530
|
|
|
d1681e |
Subject: [PATCH 198/201] cluster/ec: Change default read policy to gfid-hash
|
|
|
d1681e |
|
|
|
d1681e |
Problem:
|
|
|
d1681e |
Whenever we read data from file over NFS, NFS reads
|
|
|
d1681e |
more data then requested and caches it. Based on the
|
|
|
d1681e |
stat information it makes sure that the cached/pre-read
|
|
|
d1681e |
data is valid or not.
|
|
|
d1681e |
|
|
|
d1681e |
Consider 4 + 2 EC volume and all the bricks are on
|
|
|
d1681e |
differnt nodes.
|
|
|
d1681e |
|
|
|
d1681e |
In EC, with round-robin read policy, reads are sent on
|
|
|
d1681e |
different set of data bricks. This way, it balances the
|
|
|
d1681e |
read fops to go on all the bricks and avoid heating UP
|
|
|
d1681e |
(overloading) same set of bricks.
|
|
|
d1681e |
|
|
|
d1681e |
Due to small difference in clock speed, it is possible
|
|
|
d1681e |
that we get minor difference for atime, mtime or ctime
|
|
|
d1681e |
for different bricks. That might cause a different stat
|
|
|
d1681e |
returned to NFS based on which NFS will discard
|
|
|
d1681e |
cached/pre-read data which is actually not changed and
|
|
|
d1681e |
could be used.
|
|
|
d1681e |
|
|
|
d1681e |
Solution:
|
|
|
d1681e |
Change read policy for EC as gfid-hash. That will force
|
|
|
d1681e |
all the read to go to same set of bricks.
|
|
|
d1681e |
|
|
|
d1681e |
>Change-Id: I825441cc519e94bf3dc3aa0bd4cb7c6ae6392c84
|
|
|
d1681e |
>BUG: 1554743
|
|
|
d1681e |
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
d1681e |
|
|
|
d1681e |
upstream patch: https://review.gluster.org/#/c/19703/
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: I43e95717980ca52c228fdcb7863c58bd4d14151c
|
|
|
d1681e |
BUG: 1559084
|
|
|
d1681e |
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/133746
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
tests/basic/ec/ec-read-policy.t | 7 +++----
|
|
|
d1681e |
xlators/cluster/ec/src/ec.c | 2 +-
|
|
|
d1681e |
2 files changed, 4 insertions(+), 5 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
|
|
|
d1681e |
index e4390aa..fe6fe65 100644
|
|
|
d1681e |
--- a/tests/basic/ec/ec-read-policy.t
|
|
|
d1681e |
+++ b/tests/basic/ec/ec-read-policy.t
|
|
|
d1681e |
@@ -20,10 +20,9 @@ TEST $CLI volume start $V0
|
|
|
d1681e |
TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
|
|
|
d1681e |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
d1681e |
#TEST volume operations work fine
|
|
|
d1681e |
-EXPECT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
|
|
|
d1681e |
-TEST $CLI volume set $V0 disperse.read-policy gfid-hash
|
|
|
d1681e |
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
|
|
|
d1681e |
-TEST $CLI volume reset $V0 disperse.read-policy
|
|
|
d1681e |
+
|
|
|
d1681e |
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
|
|
|
d1681e |
+TEST $CLI volume set $V0 disperse.read-policy round-robin
|
|
|
d1681e |
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
|
|
|
d1681e |
|
|
|
d1681e |
#TEST if the option gives the intended behavior. The way we perform this test
|
|
|
d1681e |
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
index 13ce7fb..bfdca64 100644
|
|
|
d1681e |
--- a/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
+++ b/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
@@ -1447,7 +1447,7 @@ struct volume_options options[] =
|
|
|
d1681e |
{ .key = {"read-policy" },
|
|
|
d1681e |
.type = GF_OPTION_TYPE_STR,
|
|
|
d1681e |
.value = {"round-robin", "gfid-hash"},
|
|
|
d1681e |
- .default_value = "round-robin",
|
|
|
d1681e |
+ .default_value = "gfid-hash",
|
|
|
d1681e |
.description = "inode-read fops happen only on 'k' number of bricks in"
|
|
|
d1681e |
" n=k+m disperse subvolume. 'round-robin' selects the read"
|
|
|
d1681e |
" subvolume using round-robin algo. 'gfid-hash' selects read"
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|