cb8e9e
From 3abed9139e839cb1393d27bce5b2459b96b5a19e Mon Sep 17 00:00:00 2001
cb8e9e
From: Ravishankar N <ravishankar@redhat.com>
cb8e9e
Date: Thu, 13 Aug 2015 18:33:08 +0530
cb8e9e
Subject: [PATCH 288/304] afr: launch index heal on local subvols up on a child-up event
cb8e9e
cb8e9e
Patch in master: http://review.gluster.org/11912
cb8e9e
Patch in release-3.7: http://review.gluster.org/#/c/11982/
cb8e9e
cb8e9e
Problem:
cb8e9e
When a replica's child goes down and comes up, the index heal is
cb8e9e
triggered only on the child that just came up. This does not serve the
cb8e9e
intended purpose as the list of files that need to be healed
cb8e9e
to this child is actually captured on the other child of the replica.
cb8e9e
cb8e9e
Fix:
cb8e9e
Launch index-heal on all local children of the replica xlator which just
cb8e9e
received a child up. Note that afr_selfheal_childup() eventually calls
cb8e9e
afr_shd_index_healer() which will not run the heal on non-local
cb8e9e
children.
cb8e9e
cb8e9e
Change-Id: Ic0d47728c45f1c2bfb443c3bcc551818e5f8680c
cb8e9e
BUG: 1239021
cb8e9e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/56024
cb8e9e
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
cb8e9e
---
cb8e9e
 xlators/cluster/afr/src/afr-common.c |   28 +++++++++++-----------------
cb8e9e
 1 files changed, 11 insertions(+), 17 deletions(-)
cb8e9e
cb8e9e
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
cb8e9e
index 31c6eff..2401cfd 100644
cb8e9e
--- a/xlators/cluster/afr/src/afr-common.c
cb8e9e
+++ b/xlators/cluster/afr/src/afr-common.c
cb8e9e
@@ -3888,7 +3888,6 @@ afr_notify (xlator_t *this, int32_t event,
cb8e9e
         int             idx                 = -1;
cb8e9e
         int             ret                 = -1;
cb8e9e
         int             call_psh            = 0;
cb8e9e
-        int             up_child            = -1;
cb8e9e
         dict_t          *input              = NULL;
cb8e9e
         dict_t          *output             = NULL;
cb8e9e
         gf_boolean_t    had_quorum          = _gf_false;
cb8e9e
@@ -3949,7 +3948,6 @@ afr_notify (xlator_t *this, int32_t event,
cb8e9e
                         priv->child_up[idx] = 1;
cb8e9e
 
cb8e9e
                         call_psh = 1;
cb8e9e
-                        up_child = idx;
cb8e9e
                         for (i = 0; i < priv->child_count; i++)
cb8e9e
                                 if (priv->child_up[i] == 1)
cb8e9e
                                         up_children++;
cb8e9e
@@ -4085,22 +4083,18 @@ afr_notify (xlator_t *this, int32_t event,
cb8e9e
         if (propagate)
cb8e9e
                 ret = default_notify (this, event, data);
cb8e9e
 
cb8e9e
-        if (!had_heard_from_all && have_heard_from_all && priv->shd.iamshd) {
cb8e9e
-                /*
cb8e9e
-                 * Since self-heal is supposed to be launched only after
cb8e9e
-                 * the responses from all the bricks are collected,
cb8e9e
-                 * launch self-heals now on all up subvols.
cb8e9e
-                 */
cb8e9e
-                for (i = 0; i < priv->child_count; i++)
cb8e9e
-                        if (priv->child_up[i])
cb8e9e
-                                afr_selfheal_childup (this, i);
cb8e9e
-        } else if (have_heard_from_all && call_psh && priv->shd.iamshd) {
cb8e9e
-                /*
cb8e9e
-                 * Already heard from everyone. Just launch heal on now up
cb8e9e
-                 * subvolume.
cb8e9e
+        if ((!had_heard_from_all) || call_psh) {
cb8e9e
+                /* Launch self-heal on all local subvolumes if:
cb8e9e
+                 * a) We have_heard_from_all for the first time
cb8e9e
+                 * b) Already heard from everyone, but we now got a child-up
cb8e9e
+                 *    event.
cb8e9e
                  */
cb8e9e
-                 afr_selfheal_childup (this, up_child);
cb8e9e
-	}
cb8e9e
+                if (have_heard_from_all && priv->shd.iamshd) {
cb8e9e
+                        for (i = 0; i < priv->child_count; i++)
cb8e9e
+                                if (priv->child_up[i])
cb8e9e
+                                        afr_selfheal_childup (this, i);
cb8e9e
+                }
cb8e9e
+        }
cb8e9e
 out:
cb8e9e
         return ret;
cb8e9e
 }
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e