e7a346
From a4f3087ecbd1979525add83a149acaf2443d8e59 Mon Sep 17 00:00:00 2001
e7a346
From: Xavier Hernandez <jahernan@redhat.com>
e7a346
Date: Wed, 22 Nov 2017 11:10:32 +0100
e7a346
Subject: [PATCH 101/128] cluster/ec: Prevent self-heal to work after
e7a346
 PARENT_DOWN
e7a346
e7a346
When the volume is being stopped, PARENT_DOWN event is received.
e7a346
This instructs EC to wait until all pending operations are completed
e7a346
before declaring itself down. However heal operations are ignored
e7a346
and allowed to continue even after having said it was down.
e7a346
e7a346
This may cause unexpected results and crashes.
e7a346
e7a346
To solve this, heal operations are considered exactly equal as any
e7a346
other operation and EC won't propagate PARENT_DOWN until all
e7a346
operations, including healing, are complete. To avoid big delays
e7a346
if this happens in the middle of a big heal, a check has been
e7a346
added to quit current heal if shutdown is detected.
e7a346
e7a346
>Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034
e7a346
>BUG: 1515266
e7a346
>Signed-off-by: Xavier Hernandez <jahernan@redhat.com>
e7a346
Upstream Patch: https://review.gluster.org/#/c/18840/
e7a346
e7a346
BUG: 1505570
e7a346
Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034
e7a346
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/125199
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
---
e7a346
 xlators/cluster/ec/src/ec-data.c | 21 ++------------
e7a346
 xlators/cluster/ec/src/ec-heal.c | 59 +++++++++++++++++++++++++++++++++-------
e7a346
 2 files changed, 52 insertions(+), 28 deletions(-)
e7a346
e7a346
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
e7a346
index 28bf988..54c708a 100644
e7a346
--- a/xlators/cluster/ec/src/ec-data.c
e7a346
+++ b/xlators/cluster/ec/src/ec-data.c
e7a346
@@ -103,19 +103,6 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
e7a346
     mem_put(cbk);
e7a346
 }
e7a346
 
e7a346
-/* PARENT_DOWN will be notified to children only after these fops are complete
e7a346
- * when graph switch happens.  We do not want graph switch to be waiting on
e7a346
- * heal to complete as healing big file/directory could take a while. Which
e7a346
- * will lead to hang on the mount.
e7a346
- */
e7a346
-static gf_boolean_t
e7a346
-ec_needs_graceful_completion (ec_fop_data_t *fop)
e7a346
-{
e7a346
-        if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL))
e7a346
-                return _gf_true;
e7a346
-        return _gf_false;
e7a346
-}
e7a346
-
e7a346
 ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
e7a346
                                      int32_t id, uint32_t flags,
e7a346
                                      uintptr_t target, int32_t minimum,
e7a346
@@ -202,13 +189,11 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
e7a346
         fop->parent = parent;
e7a346
     }
e7a346
 
e7a346
-    if (ec_needs_graceful_completion (fop)) {
e7a346
-            LOCK(&ec->lock);
e7a346
+    LOCK(&ec->lock);
e7a346
 
e7a346
-            list_add_tail(&fop->pending_list, &ec->pending_fops);
e7a346
+    list_add_tail(&fop->pending_list, &ec->pending_fops);
e7a346
 
e7a346
-            UNLOCK(&ec->lock);
e7a346
-    }
e7a346
+    UNLOCK(&ec->lock);
e7a346
 
e7a346
     return fop;
e7a346
 }
e7a346
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
e7a346
index fd8c902..b8518d6 100644
e7a346
--- a/xlators/cluster/ec/src/ec-heal.c
e7a346
+++ b/xlators/cluster/ec/src/ec-heal.c
e7a346
@@ -1418,6 +1418,12 @@ ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
e7a346
         int                 i          = 0;
e7a346
         int                 ret        = 0;
e7a346
 
e7a346
+        if (ec->shutdown) {
e7a346
+                gf_msg_debug(this->name, 0, "Cancelling directory heal "
e7a346
+                                            "because EC is stopping.");
e7a346
+                return -ENOTCONN;
e7a346
+        }
e7a346
+
e7a346
         memcpy (name_on, name_data->participants, ec->nodes);
e7a346
         ret = ec_heal_name (name_data->frame, ec, parent->inode,
e7a346
                             entry->d_name, name_on);
e7a346
@@ -1439,6 +1445,7 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
e7a346
         int j = 0;
e7a346
         loc_t loc = {0};
e7a346
         struct ec_name_data name_data = {0};
e7a346
+        int ret = 0;
e7a346
 
e7a346
         loc.inode = inode_ref (inode);
e7a346
         gf_uuid_copy (loc.gfid, inode->gfid);
e7a346
@@ -1449,18 +1456,23 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
e7a346
         for (i = 0; i < ec->nodes; i++) {
e7a346
                 if (!participants[i])
e7a346
                         continue;
e7a346
-                syncop_dir_scan (ec->xl_list[i], &loc,
e7a346
-                                GF_CLIENT_PID_SELF_HEALD, &name_data,
e7a346
-                                ec_name_heal_handler);
e7a346
+                ret = syncop_dir_scan (ec->xl_list[i], &loc,
e7a346
+                                       GF_CLIENT_PID_SELF_HEALD, &name_data,
e7a346
+                                       ec_name_heal_handler);
e7a346
+                if (ret < 0) {
e7a346
+                        break;
e7a346
+                }
e7a346
                 for (j = 0; j < ec->nodes; j++)
e7a346
                         if (name_data.failed_on[j])
e7a346
                                 participants[j] = 0;
e7a346
 
e7a346
-                if (EC_COUNT (participants, ec->nodes) <= ec->fragments)
e7a346
-                        return -ENOTCONN;
e7a346
+                if (EC_COUNT (participants, ec->nodes) <= ec->fragments) {
e7a346
+                        ret = -ENOTCONN;
e7a346
+                        break;
e7a346
+                }
e7a346
         }
e7a346
         loc_wipe (&loc;;
e7a346
-        return 0;
e7a346
+        return ret;
e7a346
 }
e7a346
 
e7a346
 int
e7a346
@@ -1999,6 +2011,17 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
e7a346
 
e7a346
         for (heal->offset = 0; (heal->offset < size) && !heal->done;
e7a346
                                                    heal->offset += heal->size) {
e7a346
+                /* We immediately abort any heal if a shutdown request has been
e7a346
+                 * received to avoid delays. The healing of this file will be
e7a346
+                 * restarted by another SHD or other client that accesses the
e7a346
+                 * file. */
e7a346
+                if (ec->shutdown) {
e7a346
+                        gf_msg_debug(ec->xl->name, 0, "Cancelling heal because "
e7a346
+                                                      "EC is stopping.");
e7a346
+                        ret = -ENOTCONN;
e7a346
+                        break;
e7a346
+                }
e7a346
+
e7a346
                 gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
e7a346
                         "%d, offset: %"PRIu64" bsize: %"PRIu64,
e7a346
                         uuid_utoa (fd->inode->gfid),
e7a346
@@ -2595,16 +2618,32 @@ ec_handle_healers_done (ec_fop_data_t *fop)
e7a346
                 return;
e7a346
 
e7a346
         LOCK (&ec->lock);
e7a346
-        {
e7a346
-                list_del_init (&fop->healer);
e7a346
+
e7a346
+        list_del_init (&fop->healer);
e7a346
+
e7a346
+        do {
e7a346
                 ec->healers--;
e7a346
                 heal_fop = __ec_dequeue_heals (ec);
e7a346
-        }
e7a346
+
e7a346
+                if ((heal_fop != NULL) && ec->shutdown) {
e7a346
+                        /* This will prevent ec_handle_healers_done() to be
e7a346
+                         * called recursively. That would be problematic if
e7a346
+                         * the queue is too big. */
e7a346
+                        list_del_init(&heal_fop->healer);
e7a346
+
e7a346
+                        UNLOCK(&ec->lock);
e7a346
+
e7a346
+                        ec_fop_set_error(fop, ENOTCONN);
e7a346
+                        ec_heal_fail(ec, heal_fop);
e7a346
+
e7a346
+                        LOCK(&ec->lock);
e7a346
+                }
e7a346
+        } while ((heal_fop != NULL) && ec->shutdown);
e7a346
+
e7a346
         UNLOCK (&ec->lock);
e7a346
 
e7a346
         if (heal_fop)
e7a346
                 ec_launch_heal (ec, heal_fop);
e7a346
-
e7a346
 }
e7a346
 
e7a346
 void
e7a346
-- 
e7a346
1.8.3.1
e7a346