d1681e
From 8217d00a0a54457961e7ec7d3afb24e953923c7d Mon Sep 17 00:00:00 2001
d1681e
From: Ashish Pandey <aspandey@redhat.com>
d1681e
Date: Tue, 13 Mar 2018 14:03:20 +0530
d1681e
Subject: [PATCH 198/201] cluster/ec: Change default read policy to gfid-hash
d1681e
d1681e
Problem:
d1681e
Whenever we read data from file over NFS, NFS reads
d1681e
more data then requested and caches it. Based on the
d1681e
stat information it makes sure that the cached/pre-read
d1681e
data is valid or not.
d1681e
d1681e
Consider 4 + 2 EC volume and all the bricks are on
d1681e
differnt nodes.
d1681e
d1681e
In EC, with round-robin read policy, reads are sent on
d1681e
different set of data bricks. This way, it balances the
d1681e
read fops to go on all the bricks and avoid heating UP
d1681e
(overloading) same set of bricks.
d1681e
d1681e
Due to small difference in clock speed, it is possible
d1681e
that we get minor difference for atime, mtime or ctime
d1681e
for different bricks. That might cause a different stat
d1681e
returned to NFS based on which NFS will discard
d1681e
cached/pre-read data which is actually not changed and
d1681e
could be used.
d1681e
d1681e
Solution:
d1681e
Change read policy for EC as gfid-hash. That will force
d1681e
all the read to go to same set of bricks.
d1681e
d1681e
>Change-Id: I825441cc519e94bf3dc3aa0bd4cb7c6ae6392c84
d1681e
>BUG: 1554743
d1681e
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
d1681e
d1681e
upstream patch: https://review.gluster.org/#/c/19703/
d1681e
d1681e
Change-Id: I43e95717980ca52c228fdcb7863c58bd4d14151c
d1681e
BUG: 1559084
d1681e
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/133746
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d1681e
---
d1681e
 tests/basic/ec/ec-read-policy.t | 7 +++----
d1681e
 xlators/cluster/ec/src/ec.c     | 2 +-
d1681e
 2 files changed, 4 insertions(+), 5 deletions(-)
d1681e
d1681e
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
d1681e
index e4390aa..fe6fe65 100644
d1681e
--- a/tests/basic/ec/ec-read-policy.t
d1681e
+++ b/tests/basic/ec/ec-read-policy.t
d1681e
@@ -20,10 +20,9 @@ TEST $CLI volume start $V0
d1681e
 TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
d1681e
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
d1681e
 #TEST volume operations work fine
d1681e
-EXPECT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
d1681e
-TEST $CLI volume set $V0 disperse.read-policy gfid-hash
d1681e
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
d1681e
-TEST $CLI volume reset $V0 disperse.read-policy
d1681e
+
d1681e
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
d1681e
+TEST $CLI volume set $V0 disperse.read-policy round-robin
d1681e
 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
d1681e
 
d1681e
 #TEST if the option gives the intended behavior. The way we perform this test
d1681e
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
d1681e
index 13ce7fb..bfdca64 100644
d1681e
--- a/xlators/cluster/ec/src/ec.c
d1681e
+++ b/xlators/cluster/ec/src/ec.c
d1681e
@@ -1447,7 +1447,7 @@ struct volume_options options[] =
d1681e
     { .key = {"read-policy" },
d1681e
       .type = GF_OPTION_TYPE_STR,
d1681e
       .value = {"round-robin", "gfid-hash"},
d1681e
-      .default_value = "round-robin",
d1681e
+      .default_value = "gfid-hash",
d1681e
       .description = "inode-read fops happen only on 'k' number of bricks in"
d1681e
               " n=k+m disperse subvolume. 'round-robin' selects the read"
d1681e
               " subvolume using round-robin algo. 'gfid-hash' selects read"
d1681e
-- 
d1681e
1.8.3.1
d1681e