|
|
cb8e9e |
From 3abed9139e839cb1393d27bce5b2459b96b5a19e Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
cb8e9e |
Date: Thu, 13 Aug 2015 18:33:08 +0530
|
|
|
cb8e9e |
Subject: [PATCH 288/304] afr: launch index heal on local subvols up on a child-up event
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Patch in master: http://review.gluster.org/11912
|
|
|
cb8e9e |
Patch in release-3.7: http://review.gluster.org/#/c/11982/
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Problem:
|
|
|
cb8e9e |
When a replica's child goes down and comes up, the index heal is
|
|
|
cb8e9e |
triggered only on the child that just came up. This does not serve the
|
|
|
cb8e9e |
intended purpose as the list of files that need to be healed
|
|
|
cb8e9e |
to this child is actually captured on the other child of the replica.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Fix:
|
|
|
cb8e9e |
Launch index-heal on all local children of the replica xlator which just
|
|
|
cb8e9e |
received a child up. Note that afr_selfheal_childup() eventually calls
|
|
|
cb8e9e |
afr_shd_index_healer() which will not run the heal on non-local
|
|
|
cb8e9e |
children.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: Ic0d47728c45f1c2bfb443c3bcc551818e5f8680c
|
|
|
cb8e9e |
BUG: 1239021
|
|
|
cb8e9e |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/56024
|
|
|
cb8e9e |
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
xlators/cluster/afr/src/afr-common.c | 28 +++++++++++-----------------
|
|
|
cb8e9e |
1 files changed, 11 insertions(+), 17 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
cb8e9e |
index 31c6eff..2401cfd 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
cb8e9e |
@@ -3888,7 +3888,6 @@ afr_notify (xlator_t *this, int32_t event,
|
|
|
cb8e9e |
int idx = -1;
|
|
|
cb8e9e |
int ret = -1;
|
|
|
cb8e9e |
int call_psh = 0;
|
|
|
cb8e9e |
- int up_child = -1;
|
|
|
cb8e9e |
dict_t *input = NULL;
|
|
|
cb8e9e |
dict_t *output = NULL;
|
|
|
cb8e9e |
gf_boolean_t had_quorum = _gf_false;
|
|
|
cb8e9e |
@@ -3949,7 +3948,6 @@ afr_notify (xlator_t *this, int32_t event,
|
|
|
cb8e9e |
priv->child_up[idx] = 1;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
call_psh = 1;
|
|
|
cb8e9e |
- up_child = idx;
|
|
|
cb8e9e |
for (i = 0; i < priv->child_count; i++)
|
|
|
cb8e9e |
if (priv->child_up[i] == 1)
|
|
|
cb8e9e |
up_children++;
|
|
|
cb8e9e |
@@ -4085,22 +4083,18 @@ afr_notify (xlator_t *this, int32_t event,
|
|
|
cb8e9e |
if (propagate)
|
|
|
cb8e9e |
ret = default_notify (this, event, data);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (!had_heard_from_all && have_heard_from_all && priv->shd.iamshd) {
|
|
|
cb8e9e |
- /*
|
|
|
cb8e9e |
- * Since self-heal is supposed to be launched only after
|
|
|
cb8e9e |
- * the responses from all the bricks are collected,
|
|
|
cb8e9e |
- * launch self-heals now on all up subvols.
|
|
|
cb8e9e |
- */
|
|
|
cb8e9e |
- for (i = 0; i < priv->child_count; i++)
|
|
|
cb8e9e |
- if (priv->child_up[i])
|
|
|
cb8e9e |
- afr_selfheal_childup (this, i);
|
|
|
cb8e9e |
- } else if (have_heard_from_all && call_psh && priv->shd.iamshd) {
|
|
|
cb8e9e |
- /*
|
|
|
cb8e9e |
- * Already heard from everyone. Just launch heal on now up
|
|
|
cb8e9e |
- * subvolume.
|
|
|
cb8e9e |
+ if ((!had_heard_from_all) || call_psh) {
|
|
|
cb8e9e |
+ /* Launch self-heal on all local subvolumes if:
|
|
|
cb8e9e |
+ * a) We have_heard_from_all for the first time
|
|
|
cb8e9e |
+ * b) Already heard from everyone, but we now got a child-up
|
|
|
cb8e9e |
+ * event.
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
- afr_selfheal_childup (this, up_child);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ if (have_heard_from_all && priv->shd.iamshd) {
|
|
|
cb8e9e |
+ for (i = 0; i < priv->child_count; i++)
|
|
|
cb8e9e |
+ if (priv->child_up[i])
|
|
|
cb8e9e |
+ afr_selfheal_childup (this, i);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|