|
|
12a457 |
From a468ff0af57528dbd7a711f985c0524251ec90d3 Mon Sep 17 00:00:00 2001
|
|
|
12a457 |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
12a457 |
Date: Tue, 5 Apr 2016 11:40:05 +0530
|
|
|
12a457 |
Subject: [PATCH 40/80] afr: Add throttled background client-side heals
|
|
|
12a457 |
|
|
|
12a457 |
Backport of: http://review.gluster.org/13207
|
|
|
12a457 |
|
|
|
12a457 |
If a heal is needed after inode refresh (lookup, read_txn), launch it in
|
|
|
12a457 |
the background instead of blocking the fop (that triggered refresh)
|
|
|
12a457 |
until the heal happens.
|
|
|
12a457 |
|
|
|
12a457 |
afr_replies_interpret() is modified such that the heal is
|
|
|
12a457 |
launched only if atleast one sink brick is up.
|
|
|
12a457 |
|
|
|
12a457 |
Max. no of heals that can happen in parallel is configurable via the
|
|
|
12a457 |
'background-self-heal-count' volume option. Any number greater than that
|
|
|
12a457 |
is put in a wait queue whose length is configurable via
|
|
|
12a457 |
'heal-wait-queue-leng' volume option. If the wait queue is also full,
|
|
|
12a457 |
further heals will be ignored.
|
|
|
12a457 |
|
|
|
12a457 |
Default values: background-self-heal-count=8, heal-wait-queue-leng=128
|
|
|
12a457 |
|
|
|
12a457 |
Change-Id: Ief20b915f8b3064dfbde41e9216b080de45f31f5
|
|
|
12a457 |
BUG: 1300875
|
|
|
12a457 |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
12a457 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/71393
|
|
|
12a457 |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
12a457 |
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
12a457 |
---
|
|
|
12a457 |
tests/basic/afr/client-side-heal.t | 1 +
|
|
|
12a457 |
tests/bugs/glusterd/859927/repl.t | 4 +-
|
|
|
12a457 |
tests/bugs/quota/bug-1035576.t | 1 -
|
|
|
12a457 |
tests/bugs/replicate/bug-802417.t | 5 +-
|
|
|
12a457 |
tests/bugs/replicate/bug-977797.t | 52 +++++------
|
|
|
12a457 |
xlators/cluster/afr/src/afr-common.c | 89 +++++++++----------
|
|
|
12a457 |
xlators/cluster/afr/src/afr-dir-write.c | 2 +-
|
|
|
12a457 |
xlators/cluster/afr/src/afr-self-heal-common.c | 110 +++++++++++++++++++++++
|
|
|
12a457 |
xlators/cluster/afr/src/afr-self-heal.h | 3 +
|
|
|
12a457 |
xlators/cluster/afr/src/afr.c | 26 +++++-
|
|
|
12a457 |
xlators/cluster/afr/src/afr.h | 28 +++++--
|
|
|
12a457 |
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 ++
|
|
|
12a457 |
12 files changed, 237 insertions(+), 92 deletions(-)
|
|
|
12a457 |
|
|
|
12a457 |
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
|
|
|
12a457 |
index 18f7626..d87f4b1 100644
|
|
|
12a457 |
--- a/tests/basic/afr/client-side-heal.t
|
|
|
12a457 |
+++ b/tests/basic/afr/client-side-heal.t
|
|
|
12a457 |
@@ -70,6 +70,7 @@ EXPECT 7 get_pending_heal_count $V0
|
|
|
12a457 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
12a457 |
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
|
|
|
12a457 |
TEST cat $M0/datafile
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
|
|
|
12a457 |
|
|
|
12a457 |
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
12a457 |
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
|
|
|
12a457 |
diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t
|
|
|
12a457 |
index a500961..40e8602 100755
|
|
|
12a457 |
--- a/tests/bugs/glusterd/859927/repl.t
|
|
|
12a457 |
+++ b/tests/bugs/glusterd/859927/repl.t
|
|
|
12a457 |
@@ -23,7 +23,6 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
|
|
|
12a457 |
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
|
|
12a457 |
TEST $CLI volume set $V0 performance.stat-prefetch off
|
|
|
12a457 |
TEST $CLI volume set $V0 client-log-level DEBUG
|
|
|
12a457 |
-TEST $CLI volume set $V0 cluster.background-self-heal-count 0
|
|
|
12a457 |
TEST $CLI volume start $V0
|
|
|
12a457 |
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0;
|
|
|
12a457 |
|
|
|
12a457 |
@@ -34,6 +33,7 @@ EXPECT full volume_option $V0 cluster.data-self-heal-algorithm
|
|
|
12a457 |
create_setup_for_self_heal $M0/a
|
|
|
12a457 |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
12a457 |
cat $file 2>&1 > /dev/null
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
|
|
12a457 |
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
|
|
|
12a457 |
|
|
|
12a457 |
TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff
|
|
|
12a457 |
@@ -41,12 +41,14 @@ EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm
|
|
|
12a457 |
create_setup_for_self_heal $M0/a
|
|
|
12a457 |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
12a457 |
cat $file 2>&1 > /dev/null
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
|
|
12a457 |
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
|
|
|
12a457 |
|
|
|
12a457 |
TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm
|
|
|
12a457 |
create_setup_for_self_heal $M0/a
|
|
|
12a457 |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
12a457 |
cat $file 2>&1 > /dev/null
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
|
|
12a457 |
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
|
|
|
12a457 |
|
|
|
12a457 |
TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm ""
|
|
|
12a457 |
diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t
|
|
|
12a457 |
index 99b3925..eaf4439 100644
|
|
|
12a457 |
--- a/tests/bugs/quota/bug-1035576.t
|
|
|
12a457 |
+++ b/tests/bugs/quota/bug-1035576.t
|
|
|
12a457 |
@@ -17,7 +17,6 @@ TEST $CLI volume set $V0 performance.io-cache off
|
|
|
12a457 |
TEST $CLI volume set $V0 performance.write-behind off
|
|
|
12a457 |
TEST $CLI volume set $V0 performance.stat-prefetch off
|
|
|
12a457 |
TEST $CLI volume set $V0 performance.read-ahead off
|
|
|
12a457 |
-TEST $CLI volume set $V0 background-self-heal-count 0
|
|
|
12a457 |
TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
12a457 |
TEST $CLI volume quota $V0 enable
|
|
|
12a457 |
|
|
|
12a457 |
diff --git a/tests/bugs/replicate/bug-802417.t b/tests/bugs/replicate/bug-802417.t
|
|
|
12a457 |
index df989b1..c5ba98b 100755
|
|
|
12a457 |
--- a/tests/bugs/replicate/bug-802417.t
|
|
|
12a457 |
+++ b/tests/bugs/replicate/bug-802417.t
|
|
|
12a457 |
@@ -32,7 +32,6 @@ TEST $CLI volume set $V0 performance.stat-prefetch off
|
|
|
12a457 |
## Make sure automatic self-heal doesn't perturb our results.
|
|
|
12a457 |
TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
|
|
12a457 |
TEST $CLI volume set $V0 cluster.data-self-heal on
|
|
|
12a457 |
-TEST $CLI volume set $V0 cluster.background-self-heal-count 0
|
|
|
12a457 |
|
|
|
12a457 |
## Start volume and verify
|
|
|
12a457 |
TEST $CLI volume start $V0;
|
|
|
12a457 |
@@ -70,8 +69,8 @@ tgt_xattr_2="trusted.afr.${V0}-client-2"
|
|
|
12a457 |
actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0)
|
|
|
12a457 |
EXPECT "0x000000000000000000000000|^\$" echo $actual
|
|
|
12a457 |
|
|
|
12a457 |
-actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1)
|
|
|
12a457 |
-EXPECT "0x000000000000000000000000|^\$" echo $actual
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "0x000000000000000000000000" \
|
|
|
12a457 |
+afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1
|
|
|
12a457 |
|
|
|
12a457 |
actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2)
|
|
|
12a457 |
EXPECT "0x000000030000000000000000" echo $actual
|
|
|
12a457 |
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
|
|
|
12a457 |
index 3ff14ec..72c616b 100755
|
|
|
12a457 |
--- a/tests/bugs/replicate/bug-977797.t
|
|
|
12a457 |
+++ b/tests/bugs/replicate/bug-977797.t
|
|
|
12a457 |
@@ -26,7 +26,6 @@ TEST $CLI volume set $V0 quick-read off
|
|
|
12a457 |
TEST $CLI volume set $V0 read-ahead off
|
|
|
12a457 |
TEST $CLI volume set $V0 write-behind off
|
|
|
12a457 |
TEST $CLI volume set $V0 io-cache off
|
|
|
12a457 |
-TEST $CLI volume set $V0 background-self-heal-count 0
|
|
|
12a457 |
|
|
|
12a457 |
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
|
|
12a457 |
|
|
|
12a457 |
@@ -56,34 +55,29 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
|
|
|
12a457 |
|
|
|
12a457 |
TEST dd if=$M0/a/file of=/dev/null bs=1024k
|
|
|
12a457 |
|
|
|
12a457 |
-b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
|
|
|
12a457 |
- trusted.afr.$V0-client-0 "entry")
|
|
|
12a457 |
-b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
|
|
|
12a457 |
- trusted.afr.$V0-client-1 "entry")
|
|
|
12a457 |
-b2c0dir=$(afr_get_specific_changelog_xattr \
|
|
|
12a457 |
- $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry")
|
|
|
12a457 |
-b2c1dir=$(afr_get_specific_changelog_xattr \
|
|
|
12a457 |
- $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry")
|
|
|
12a457 |
-
|
|
|
12a457 |
-
|
|
|
12a457 |
-b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
|
|
|
12a457 |
- trusted.afr.$V0-client-0 "data")
|
|
|
12a457 |
-b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
|
|
|
12a457 |
- trusted.afr.$V0-client-1 "data")
|
|
|
12a457 |
-b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
|
|
|
12a457 |
- trusted.afr.$V0-client-0 "data")
|
|
|
12a457 |
-b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
|
|
|
12a457 |
- trusted.afr.$V0-client-1 "data")
|
|
|
12a457 |
-
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b1c0f
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b1c1f
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b2c0f
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b2c1f
|
|
|
12a457 |
-
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b1c0dir
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b1c1dir
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b2c0dir
|
|
|
12a457 |
-EXPECT "00000000|^$" echo $b2c1dir
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-1 "data"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-0 "data"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "data"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry"
|
|
|
12a457 |
+
|
|
|
12a457 |
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
|
|
|
12a457 |
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry"
|
|
|
12a457 |
|
|
|
12a457 |
## Finish up
|
|
|
12a457 |
TEST $CLI volume stop $V0;
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
12a457 |
index b232a2d..b5d07ac 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
12a457 |
@@ -697,7 +697,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
int
|
|
|
12a457 |
-afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
|
|
|
12a457 |
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
12a457 |
+ gf_boolean_t *start_heal)
|
|
|
12a457 |
{
|
|
|
12a457 |
afr_local_t *local = NULL;
|
|
|
12a457 |
afr_private_t *priv = NULL;
|
|
|
12a457 |
@@ -777,6 +778,13 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
|
|
|
12a457 |
}
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
+ for (i = 0; i < priv->child_count; i++) {
|
|
|
12a457 |
+ if (start_heal && priv->child_up[i] &&
|
|
|
12a457 |
+ (!data_readable[i] || !metadata_readable[i])) {
|
|
|
12a457 |
+ *start_heal = _gf_true;
|
|
|
12a457 |
+ break;
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+ }
|
|
|
12a457 |
afr_inode_read_subvol_set (inode, this, data_readable,
|
|
|
12a457 |
metadata_readable, event_generation);
|
|
|
12a457 |
return ret;
|
|
|
12a457 |
@@ -815,36 +823,6 @@ ret:
|
|
|
12a457 |
return -err;
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
-
|
|
|
12a457 |
-int
|
|
|
12a457 |
-afr_refresh_selfheal_wrap (void *opaque)
|
|
|
12a457 |
-{
|
|
|
12a457 |
- call_frame_t *frame = opaque;
|
|
|
12a457 |
- afr_local_t *local = NULL;
|
|
|
12a457 |
- xlator_t *this = NULL;
|
|
|
12a457 |
- int err = 0;
|
|
|
12a457 |
-
|
|
|
12a457 |
- local = frame->local;
|
|
|
12a457 |
- this = frame->this;
|
|
|
12a457 |
-
|
|
|
12a457 |
- afr_selfheal (frame->this, local->refreshinode->gfid);
|
|
|
12a457 |
-
|
|
|
12a457 |
- afr_selfheal_unlocked_discover (frame, local->refreshinode,
|
|
|
12a457 |
- local->refreshinode->gfid,
|
|
|
12a457 |
- local->replies);
|
|
|
12a457 |
-
|
|
|
12a457 |
- afr_replies_interpret (frame, this, local->refreshinode);
|
|
|
12a457 |
-
|
|
|
12a457 |
- err = afr_inode_refresh_err (frame, this);
|
|
|
12a457 |
-
|
|
|
12a457 |
- afr_local_replies_wipe (local, this->private);
|
|
|
12a457 |
-
|
|
|
12a457 |
- local->refreshfn (frame, this, err);
|
|
|
12a457 |
-
|
|
|
12a457 |
- return 0;
|
|
|
12a457 |
-}
|
|
|
12a457 |
-
|
|
|
12a457 |
-
|
|
|
12a457 |
gf_boolean_t
|
|
|
12a457 |
afr_selfheal_enabled (xlator_t *this)
|
|
|
12a457 |
{
|
|
|
12a457 |
@@ -860,35 +838,43 @@ afr_selfheal_enabled (xlator_t *this)
|
|
|
12a457 |
return data || priv->metadata_self_heal || priv->entry_self_heal;
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
-
|
|
|
12a457 |
int
|
|
|
12a457 |
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
{
|
|
|
12a457 |
- call_frame_t *heal = NULL;
|
|
|
12a457 |
+ call_frame_t *heal_frame = NULL;
|
|
|
12a457 |
afr_local_t *local = NULL;
|
|
|
12a457 |
+ gf_boolean_t start_heal = _gf_false;
|
|
|
12a457 |
+ afr_local_t *heal_local = NULL;
|
|
|
12a457 |
+ int op_errno = ENOMEM;
|
|
|
12a457 |
int ret = 0;
|
|
|
12a457 |
int err = 0;
|
|
|
12a457 |
|
|
|
12a457 |
local = frame->local;
|
|
|
12a457 |
|
|
|
12a457 |
- ret = afr_replies_interpret (frame, this, local->refreshinode);
|
|
|
12a457 |
+ ret = afr_replies_interpret (frame, this, local->refreshinode,
|
|
|
12a457 |
+ &start_heal);
|
|
|
12a457 |
|
|
|
12a457 |
err = afr_inode_refresh_err (frame, this);
|
|
|
12a457 |
|
|
|
12a457 |
afr_local_replies_wipe (local, this->private);
|
|
|
12a457 |
|
|
|
12a457 |
- if (ret && afr_selfheal_enabled (this)) {
|
|
|
12a457 |
- heal = copy_frame (frame);
|
|
|
12a457 |
- if (heal)
|
|
|
12a457 |
- heal->root->pid = GF_CLIENT_PID_SELF_HEALD;
|
|
|
12a457 |
- ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
|
|
|
12a457 |
- afr_refresh_selfheal_done, heal, frame);
|
|
|
12a457 |
- if (ret)
|
|
|
12a457 |
- goto refresh_done;
|
|
|
12a457 |
- } else {
|
|
|
12a457 |
- refresh_done:
|
|
|
12a457 |
- local->refreshfn (frame, this, err);
|
|
|
12a457 |
- }
|
|
|
12a457 |
+ if (ret && afr_selfheal_enabled (this) && start_heal) {
|
|
|
12a457 |
+ heal_frame = copy_frame (frame);
|
|
|
12a457 |
+ if (!heal_frame)
|
|
|
12a457 |
+ goto refresh_done;
|
|
|
12a457 |
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
|
|
|
12a457 |
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
|
|
|
12a457 |
+ if (!heal_local) {
|
|
|
12a457 |
+ AFR_STACK_DESTROY (heal_frame);
|
|
|
12a457 |
+ goto refresh_done;
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+ heal_local->refreshinode = inode_ref (local->refreshinode);
|
|
|
12a457 |
+ heal_local->heal_frame = heal_frame;
|
|
|
12a457 |
+ afr_throttled_selfheal (heal_frame, this);
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+
|
|
|
12a457 |
+refresh_done:
|
|
|
12a457 |
+ local->refreshfn (frame, this, err);
|
|
|
12a457 |
|
|
|
12a457 |
return 0;
|
|
|
12a457 |
}
|
|
|
12a457 |
@@ -1785,7 +1771,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
*/
|
|
|
12a457 |
gf_uuid_copy (args.gfid, read_gfid);
|
|
|
12a457 |
args.ia_type = ia_type;
|
|
|
12a457 |
- if (afr_replies_interpret (frame, this, local->inode)) {
|
|
|
12a457 |
+ if (afr_replies_interpret (frame, this, local->inode, NULL)) {
|
|
|
12a457 |
read_subvol = afr_read_subvol_decide (local->inode,
|
|
|
12a457 |
this, &args);
|
|
|
12a457 |
afr_inode_read_subvol_reset (local->inode, this);
|
|
|
12a457 |
@@ -2246,7 +2232,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
goto unwind;
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
- afr_replies_interpret (frame, this, local->inode);
|
|
|
12a457 |
+ afr_replies_interpret (frame, this, local->inode, NULL);
|
|
|
12a457 |
|
|
|
12a457 |
read_subvol = afr_read_subvol_decide (local->inode, this, NULL);
|
|
|
12a457 |
if (read_subvol == -1) {
|
|
|
12a457 |
@@ -3899,6 +3885,12 @@ afr_priv_dump (xlator_t *this)
|
|
|
12a457 |
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
|
|
|
12a457 |
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
|
|
|
12a457 |
gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
|
|
|
12a457 |
+ gf_proc_dump_write("heal-wait-queue-length", "%d",
|
|
|
12a457 |
+ priv->heal_wait_qlen);
|
|
|
12a457 |
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
|
|
|
12a457 |
+ gf_proc_dump_write("background-self-heal-count", "%d",
|
|
|
12a457 |
+ priv->background_self_heal_count);
|
|
|
12a457 |
+ gf_proc_dump_write("healers", "%d", priv->healers);
|
|
|
12a457 |
|
|
|
12a457 |
return 0;
|
|
|
12a457 |
}
|
|
|
12a457 |
@@ -4205,6 +4197,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
|
|
|
12a457 |
goto out;
|
|
|
12a457 |
}
|
|
|
12a457 |
|
|
|
12a457 |
+ INIT_LIST_HEAD (&local->healer);
|
|
|
12a457 |
return 0;
|
|
|
12a457 |
out:
|
|
|
12a457 |
return -1;
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
12a457 |
index 3d586dd..887298b 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
12a457 |
@@ -93,7 +93,7 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
priv = this->private;
|
|
|
12a457 |
|
|
|
12a457 |
if (local->inode) {
|
|
|
12a457 |
- afr_replies_interpret (frame, this, local->inode);
|
|
|
12a457 |
+ afr_replies_interpret (frame, this, local->inode, NULL);
|
|
|
12a457 |
inode_read_subvol = afr_data_subvol_get (local->inode, this,
|
|
|
12a457 |
NULL, NULL, NULL);
|
|
|
12a457 |
}
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
12a457 |
index 6e90de0..73d7e94 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
12a457 |
@@ -20,6 +20,9 @@
|
|
|
12a457 |
#include "protocol-common.h"
|
|
|
12a457 |
#include "afr-messages.h"
|
|
|
12a457 |
|
|
|
12a457 |
+void
|
|
|
12a457 |
+afr_heal_synctask (xlator_t *this, afr_local_t *local);
|
|
|
12a457 |
+
|
|
|
12a457 |
int
|
|
|
12a457 |
afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
12a457 |
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
|
|
|
12a457 |
@@ -1423,3 +1426,110 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
|
|
|
12a457 |
|
|
|
12a457 |
return ret;
|
|
|
12a457 |
}
|
|
|
12a457 |
+
|
|
|
12a457 |
+afr_local_t*
|
|
|
12a457 |
+__afr_dequeue_heals (afr_private_t *priv)
|
|
|
12a457 |
+{
|
|
|
12a457 |
+ afr_local_t *local = NULL;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ if (list_empty (&priv->heal_waiting))
|
|
|
12a457 |
+ goto none;
|
|
|
12a457 |
+ if ((priv->background_self_heal_count > 0) &&
|
|
|
12a457 |
+ (priv->healers >= priv->background_self_heal_count))
|
|
|
12a457 |
+ goto none;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ local = list_entry (priv->heal_waiting.next, afr_local_t, healer);
|
|
|
12a457 |
+ priv->heal_waiters--;
|
|
|
12a457 |
+ GF_ASSERT (priv->heal_waiters >= 0);
|
|
|
12a457 |
+ list_del_init(&local->healer);
|
|
|
12a457 |
+ list_add(&local->healer, &priv->healing);
|
|
|
12a457 |
+ priv->healers++;
|
|
|
12a457 |
+ return local;
|
|
|
12a457 |
+none:
|
|
|
12a457 |
+ gf_msg_debug (THIS->name, 0, "Nothing dequeued. "
|
|
|
12a457 |
+ "Num healers: %d, Num Waiters: %d",
|
|
|
12a457 |
+ priv->healers, priv->heal_waiters);
|
|
|
12a457 |
+ return NULL;
|
|
|
12a457 |
+}
|
|
|
12a457 |
+
|
|
|
12a457 |
+int
|
|
|
12a457 |
+afr_refresh_selfheal_wrap (void *opaque)
|
|
|
12a457 |
+{
|
|
|
12a457 |
+ call_frame_t *heal_frame = opaque;
|
|
|
12a457 |
+ afr_local_t *local = heal_frame->local;
|
|
|
12a457 |
+ int ret = 0;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ ret = afr_selfheal (heal_frame->this, local->refreshinode->gfid);
|
|
|
12a457 |
+ return ret;
|
|
|
12a457 |
+}
|
|
|
12a457 |
+
|
|
|
12a457 |
+int
|
|
|
12a457 |
+afr_refresh_heal_done (int ret, call_frame_t *frame, void *opaque)
|
|
|
12a457 |
+{
|
|
|
12a457 |
+ call_frame_t *heal_frame = opaque;
|
|
|
12a457 |
+ xlator_t *this = heal_frame->this;
|
|
|
12a457 |
+ afr_private_t *priv = this->private;
|
|
|
12a457 |
+ afr_local_t *local = heal_frame->local;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ LOCK (&priv->lock);
|
|
|
12a457 |
+ {
|
|
|
12a457 |
+ list_del_init(&local->healer);
|
|
|
12a457 |
+ priv->healers--;
|
|
|
12a457 |
+ GF_ASSERT (priv->healers >= 0);
|
|
|
12a457 |
+ local = __afr_dequeue_heals (priv);
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+ UNLOCK (&priv->lock);
|
|
|
12a457 |
+
|
|
|
12a457 |
+ if (heal_frame)
|
|
|
12a457 |
+ AFR_STACK_DESTROY (heal_frame);
|
|
|
12a457 |
+
|
|
|
12a457 |
+ if (local)
|
|
|
12a457 |
+ afr_heal_synctask (this, local);
|
|
|
12a457 |
+ return 0;
|
|
|
12a457 |
+
|
|
|
12a457 |
+}
|
|
|
12a457 |
+
|
|
|
12a457 |
+void
|
|
|
12a457 |
+afr_heal_synctask (xlator_t *this, afr_local_t *local)
|
|
|
12a457 |
+{
|
|
|
12a457 |
+ int ret = 0;
|
|
|
12a457 |
+ call_frame_t *heal_frame = NULL;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ heal_frame = local->heal_frame;
|
|
|
12a457 |
+ ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
|
|
|
12a457 |
+ afr_refresh_heal_done, heal_frame, heal_frame);
|
|
|
12a457 |
+ if (ret < 0)
|
|
|
12a457 |
+ /* Heal not launched. Will be queued when the next inode
|
|
|
12a457 |
+ * refresh happens and shd hasn't healed it yet. */
|
|
|
12a457 |
+ afr_refresh_heal_done (ret, heal_frame, heal_frame);
|
|
|
12a457 |
+}
|
|
|
12a457 |
+
|
|
|
12a457 |
+void
|
|
|
12a457 |
+afr_throttled_selfheal (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
+{
|
|
|
12a457 |
+ gf_boolean_t can_heal = _gf_true;
|
|
|
12a457 |
+ afr_private_t *priv = this->private;
|
|
|
12a457 |
+ afr_local_t *local = frame->local;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ LOCK (&priv->lock);
|
|
|
12a457 |
+ {
|
|
|
12a457 |
+ if ((priv->background_self_heal_count > 0) &&
|
|
|
12a457 |
+ (priv->heal_wait_qlen + priv->background_self_heal_count) >
|
|
|
12a457 |
+ (priv->heal_waiters + priv->healers)) {
|
|
|
12a457 |
+ list_add_tail(&local->healer, &priv->heal_waiting);
|
|
|
12a457 |
+ priv->heal_waiters++;
|
|
|
12a457 |
+ local = __afr_dequeue_heals (priv);
|
|
|
12a457 |
+ } else {
|
|
|
12a457 |
+ can_heal = _gf_false;
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+ UNLOCK (&priv->lock);
|
|
|
12a457 |
+
|
|
|
12a457 |
+ if (can_heal) {
|
|
|
12a457 |
+ if (local)
|
|
|
12a457 |
+ afr_heal_synctask (this, local);
|
|
|
12a457 |
+ else
|
|
|
12a457 |
+ gf_msg_debug (this->name, 0, "Max number of heals are "
|
|
|
12a457 |
+ "pending, background self-heal rejected.");
|
|
|
12a457 |
+ }
|
|
|
12a457 |
+}
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
12a457 |
index 74e852a..b298fa1 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
12a457 |
@@ -85,6 +85,9 @@
|
|
|
12a457 |
int
|
|
|
12a457 |
afr_selfheal (xlator_t *this, uuid_t gfid);
|
|
|
12a457 |
|
|
|
12a457 |
+void
|
|
|
12a457 |
+afr_throttled_selfheal (call_frame_t *frame, xlator_t *this);
|
|
|
12a457 |
+
|
|
|
12a457 |
int
|
|
|
12a457 |
afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name,
|
|
|
12a457 |
void *gfid_req);
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
|
|
|
12a457 |
index 5ef920a..d65895a 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr.c
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr.c
|
|
|
12a457 |
@@ -128,6 +128,10 @@ reconfigure (xlator_t *this, dict_t *options)
|
|
|
12a457 |
priv->background_self_heal_count, options, uint32,
|
|
|
12a457 |
out);
|
|
|
12a457 |
|
|
|
12a457 |
+ GF_OPTION_RECONF ("heal-wait-queue-length",
|
|
|
12a457 |
+ priv->heal_wait_qlen, options, uint32, out);
|
|
|
12a457 |
+
|
|
|
12a457 |
+
|
|
|
12a457 |
GF_OPTION_RECONF ("metadata-self-heal",
|
|
|
12a457 |
priv->metadata_self_heal, options, bool, out);
|
|
|
12a457 |
|
|
|
12a457 |
@@ -277,6 +281,8 @@ init (xlator_t *this)
|
|
|
12a457 |
priv->read_child = -1;
|
|
|
12a457 |
|
|
|
12a457 |
GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out);
|
|
|
12a457 |
+ INIT_LIST_HEAD (&priv->healing);
|
|
|
12a457 |
+ INIT_LIST_HEAD (&priv->heal_waiting);
|
|
|
12a457 |
|
|
|
12a457 |
priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
|
|
|
12a457 |
|
|
|
12a457 |
@@ -329,6 +335,9 @@ init (xlator_t *this)
|
|
|
12a457 |
GF_OPTION_INIT ("background-self-heal-count",
|
|
|
12a457 |
priv->background_self_heal_count, uint32, out);
|
|
|
12a457 |
|
|
|
12a457 |
+ GF_OPTION_INIT ("heal-wait-queue-length",
|
|
|
12a457 |
+ priv->heal_wait_qlen, uint32, out);
|
|
|
12a457 |
+
|
|
|
12a457 |
GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
|
|
|
12a457 |
|
|
|
12a457 |
GF_OPTION_INIT ("data-self-heal-algorithm",
|
|
|
12a457 |
@@ -587,10 +596,21 @@ struct volume_options options[] = {
|
|
|
12a457 |
{ .key = {"background-self-heal-count"},
|
|
|
12a457 |
.type = GF_OPTION_TYPE_INT,
|
|
|
12a457 |
.min = 0,
|
|
|
12a457 |
- .default_value = "16",
|
|
|
12a457 |
+ .max = 256,
|
|
|
12a457 |
+ .default_value = "8",
|
|
|
12a457 |
+ .validate = GF_OPT_VALIDATE_MIN,
|
|
|
12a457 |
+ .description = "This specifies the number of per client self-heal "
|
|
|
12a457 |
+ "jobs that can perform parallel heals in the "
|
|
|
12a457 |
+ "background."
|
|
|
12a457 |
+ },
|
|
|
12a457 |
+ { .key = {"heal-wait-queue-length"},
|
|
|
12a457 |
+ .type = GF_OPTION_TYPE_INT,
|
|
|
12a457 |
+ .min = 0,
|
|
|
12a457 |
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
|
|
|
12a457 |
+ .default_value = "128",
|
|
|
12a457 |
.validate = GF_OPT_VALIDATE_MIN,
|
|
|
12a457 |
- .description = "This specifies the number of self-heals that can be "
|
|
|
12a457 |
- " performed in background without blocking the fop"
|
|
|
12a457 |
+ .description = "This specifies the number of heals that can be queued"
|
|
|
12a457 |
+ " for the parallel background self heal jobs."
|
|
|
12a457 |
},
|
|
|
12a457 |
{ .key = {"data-self-heal"},
|
|
|
12a457 |
.type = GF_OPTION_TYPE_STR,
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
12a457 |
index 52f9c51..9915344 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
12a457 |
@@ -74,8 +74,17 @@ typedef struct _afr_private {
|
|
|
12a457 |
unsigned int data_self_heal_window_size; /* max number of pipelined
|
|
|
12a457 |
read/writes */
|
|
|
12a457 |
|
|
|
12a457 |
- unsigned int background_self_heal_count;
|
|
|
12a457 |
- unsigned int background_self_heals_started;
|
|
|
12a457 |
+ struct list_head heal_waiting; /*queue for files that need heal*/
|
|
|
12a457 |
+ uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/
|
|
|
12a457 |
+ int32_t heal_waiters; /* No. of elements currently in wait queue.*/
|
|
|
12a457 |
+
|
|
|
12a457 |
+ struct list_head healing;/* queue for files that are undergoing
|
|
|
12a457 |
+ background heal*/
|
|
|
12a457 |
+ uint32_t background_self_heal_count;/*configurable queue length for
|
|
|
12a457 |
+ healing queue*/
|
|
|
12a457 |
+ int32_t healers;/* No. of elements currently undergoing background
|
|
|
12a457 |
+ heal*/
|
|
|
12a457 |
+
|
|
|
12a457 |
gf_boolean_t metadata_self_heal; /* on/off */
|
|
|
12a457 |
gf_boolean_t entry_self_heal; /* on/off */
|
|
|
12a457 |
|
|
|
12a457 |
@@ -127,12 +136,14 @@ typedef struct _afr_private {
|
|
|
12a457 |
|
|
|
12a457 |
afr_self_heald_t shd;
|
|
|
12a457 |
|
|
|
12a457 |
- /* pump dependencies */
|
|
|
12a457 |
- void *pump_private;
|
|
|
12a457 |
- gf_boolean_t use_afr_in_pump;
|
|
|
12a457 |
gf_boolean_t consistent_metadata;
|
|
|
12a457 |
uint64_t spb_choice_timeout;
|
|
|
12a457 |
gf_boolean_t need_heal;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ /* pump dependencies */
|
|
|
12a457 |
+ void *pump_private;
|
|
|
12a457 |
+ gf_boolean_t use_afr_in_pump;
|
|
|
12a457 |
+
|
|
|
12a457 |
} afr_private_t;
|
|
|
12a457 |
|
|
|
12a457 |
|
|
|
12a457 |
@@ -740,6 +751,10 @@ typedef struct _afr_local {
|
|
|
12a457 |
int xflag;
|
|
|
12a457 |
gf_boolean_t do_discovery;
|
|
|
12a457 |
struct afr_reply *replies;
|
|
|
12a457 |
+
|
|
|
12a457 |
+ /* For client side background heals. */
|
|
|
12a457 |
+ struct list_head healer;
|
|
|
12a457 |
+ call_frame_t *heal_frame;
|
|
|
12a457 |
} afr_local_t;
|
|
|
12a457 |
|
|
|
12a457 |
|
|
|
12a457 |
@@ -891,7 +906,8 @@ int
|
|
|
12a457 |
afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
|
|
|
12a457 |
|
|
|
12a457 |
int
|
|
|
12a457 |
-afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode);
|
|
|
12a457 |
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
|
12a457 |
+ gf_boolean_t *start_heal);
|
|
|
12a457 |
|
|
|
12a457 |
void
|
|
|
12a457 |
afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv);
|
|
|
12a457 |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
12a457 |
index f5746c8..1b68c1b 100644
|
|
|
12a457 |
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
12a457 |
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
12a457 |
@@ -1129,6 +1129,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
|
|
12a457 |
.op_version = GD_OP_VERSION_RHS_3_0_4,
|
|
|
12a457 |
.flags = OPT_FLAG_CLIENT_OPT
|
|
|
12a457 |
},
|
|
|
12a457 |
+ { .key = "cluster.heal-wait-queue-length",
|
|
|
12a457 |
+ .voltype = "cluster/replicate",
|
|
|
12a457 |
+ .type = DOC,
|
|
|
12a457 |
+ .op_version = GD_OP_VERSION_3_7_10,
|
|
|
12a457 |
+ .flags = OPT_FLAG_CLIENT_OPT
|
|
|
12a457 |
+ },
|
|
|
12a457 |
+
|
|
|
12a457 |
+ /* stripe xlator options */
|
|
|
12a457 |
{ .key = "cluster.stripe-block-size",
|
|
|
12a457 |
.voltype = "cluster/stripe",
|
|
|
12a457 |
.option = "block-size",
|
|
|
12a457 |
--
|
|
|
12a457 |
1.7.1
|
|
|
12a457 |
|