a3470f
From 8217d00a0a54457961e7ec7d3afb24e953923c7d Mon Sep 17 00:00:00 2001
a3470f
From: Ashish Pandey <aspandey@redhat.com>
a3470f
Date: Tue, 13 Mar 2018 14:03:20 +0530
a3470f
Subject: [PATCH 198/201] cluster/ec: Change default read policy to gfid-hash
a3470f
a3470f
Problem:
a3470f
Whenever we read data from file over NFS, NFS reads
a3470f
more data then requested and caches it. Based on the
a3470f
stat information it makes sure that the cached/pre-read
a3470f
data is valid or not.
a3470f
a3470f
Consider 4 + 2 EC volume and all the bricks are on
a3470f
differnt nodes.
a3470f
a3470f
In EC, with round-robin read policy, reads are sent on
a3470f
different set of data bricks. This way, it balances the
a3470f
read fops to go on all the bricks and avoid heating UP
a3470f
(overloading) same set of bricks.
a3470f
a3470f
Due to small difference in clock speed, it is possible
a3470f
that we get minor difference for atime, mtime or ctime
a3470f
for different bricks. That might cause a different stat
a3470f
returned to NFS based on which NFS will discard
a3470f
cached/pre-read data which is actually not changed and
a3470f
could be used.
a3470f
a3470f
Solution:
a3470f
Change read policy for EC as gfid-hash. That will force
a3470f
all the read to go to same set of bricks.
a3470f
a3470f
>Change-Id: I825441cc519e94bf3dc3aa0bd4cb7c6ae6392c84
a3470f
>BUG: 1554743
a3470f
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
a3470f
a3470f
upstream patch: https://review.gluster.org/#/c/19703/
a3470f
a3470f
Change-Id: I43e95717980ca52c228fdcb7863c58bd4d14151c
a3470f
BUG: 1559084
a3470f
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/133746
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
a3470f
---
a3470f
 tests/basic/ec/ec-read-policy.t | 7 +++----
a3470f
 xlators/cluster/ec/src/ec.c     | 2 +-
a3470f
 2 files changed, 4 insertions(+), 5 deletions(-)
a3470f
a3470f
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
a3470f
index e4390aa..fe6fe65 100644
a3470f
--- a/tests/basic/ec/ec-read-policy.t
a3470f
+++ b/tests/basic/ec/ec-read-policy.t
a3470f
@@ -20,10 +20,9 @@ TEST $CLI volume start $V0
a3470f
 TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
a3470f
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
a3470f
 #TEST volume operations work fine
a3470f
-EXPECT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
a3470f
-TEST $CLI volume set $V0 disperse.read-policy gfid-hash
a3470f
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
a3470f
-TEST $CLI volume reset $V0 disperse.read-policy
a3470f
+
a3470f
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
a3470f
+TEST $CLI volume set $V0 disperse.read-policy round-robin
a3470f
 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
a3470f
 
a3470f
 #TEST if the option gives the intended behavior. The way we perform this test
a3470f
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
a3470f
index 13ce7fb..bfdca64 100644
a3470f
--- a/xlators/cluster/ec/src/ec.c
a3470f
+++ b/xlators/cluster/ec/src/ec.c
a3470f
@@ -1447,7 +1447,7 @@ struct volume_options options[] =
a3470f
     { .key = {"read-policy" },
a3470f
       .type = GF_OPTION_TYPE_STR,
a3470f
       .value = {"round-robin", "gfid-hash"},
a3470f
-      .default_value = "round-robin",
a3470f
+      .default_value = "gfid-hash",
a3470f
       .description = "inode-read fops happen only on 'k' number of bricks in"
a3470f
               " n=k+m disperse subvolume. 'round-robin' selects the read"
a3470f
               " subvolume using round-robin algo. 'gfid-hash' selects read"
a3470f
-- 
a3470f
1.8.3.1
a3470f