Blob Blame History Raw
From 3abed9139e839cb1393d27bce5b2459b96b5a19e Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Thu, 13 Aug 2015 18:33:08 +0530
Subject: [PATCH 288/304] afr: launch index heal on local subvols up on a child-up event

Patch in master: http://review.gluster.org/11912
Patch in release-3.7: http://review.gluster.org/#/c/11982/

Problem:
When a replica's child goes down and comes up, the index heal is
triggered only on the child that just came up. This does not serve the
intended purpose as the list of files that need to be healed
to this child is actually captured on the other child of the replica.

Fix:
Launch index-heal on all local children of the replica xlator which just
received a child up. Note that afr_selfheal_childup() eventually calls
afr_shd_index_healer() which will not run the heal on non-local
children.

Change-Id: Ic0d47728c45f1c2bfb443c3bcc551818e5f8680c
BUG: 1239021
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/56024
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
---
 xlators/cluster/afr/src/afr-common.c |   28 +++++++++++-----------------
 1 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 31c6eff..2401cfd 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3888,7 +3888,6 @@ afr_notify (xlator_t *this, int32_t event,
         int             idx                 = -1;
         int             ret                 = -1;
         int             call_psh            = 0;
-        int             up_child            = -1;
         dict_t          *input              = NULL;
         dict_t          *output             = NULL;
         gf_boolean_t    had_quorum          = _gf_false;
@@ -3949,7 +3948,6 @@ afr_notify (xlator_t *this, int32_t event,
                         priv->child_up[idx] = 1;
 
                         call_psh = 1;
-                        up_child = idx;
                         for (i = 0; i < priv->child_count; i++)
                                 if (priv->child_up[i] == 1)
                                         up_children++;
@@ -4085,22 +4083,18 @@ afr_notify (xlator_t *this, int32_t event,
         if (propagate)
                 ret = default_notify (this, event, data);
 
-        if (!had_heard_from_all && have_heard_from_all && priv->shd.iamshd) {
-                /*
-                 * Since self-heal is supposed to be launched only after
-                 * the responses from all the bricks are collected,
-                 * launch self-heals now on all up subvols.
-                 */
-                for (i = 0; i < priv->child_count; i++)
-                        if (priv->child_up[i])
-                                afr_selfheal_childup (this, i);
-        } else if (have_heard_from_all && call_psh && priv->shd.iamshd) {
-                /*
-                 * Already heard from everyone. Just launch heal on now up
-                 * subvolume.
+        if ((!had_heard_from_all) || call_psh) {
+                /* Launch self-heal on all local subvolumes if:
+                 * a) We have_heard_from_all for the first time
+                 * b) Already heard from everyone, but we now got a child-up
+                 *    event.
                  */
-                 afr_selfheal_childup (this, up_child);
-	}
+                if (have_heard_from_all && priv->shd.iamshd) {
+                        for (i = 0; i < priv->child_count; i++)
+                                if (priv->child_up[i])
+                                        afr_selfheal_childup (this, i);
+                }
+        }
 out:
         return ret;
 }
-- 
1.7.1