Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0101-cluster-ec-Prevent-self-heal-to-work-after-PARENT_DO.patch

Blob History Raw

		d1681e	`From a4f3087ecbd1979525add83a149acaf2443d8e59 Mon Sep 17 00:00:00 2001`
		d1681e	`From: Xavier Hernandez <jahernan@redhat.com>`
		d1681e	`Date: Wed, 22 Nov 2017 11:10:32 +0100`
		d1681e	`Subject: [PATCH 101/128] cluster/ec: Prevent self-heal to work after`
		d1681e	`PARENT_DOWN`
		d1681e
		d1681e	`When the volume is being stopped, PARENT_DOWN event is received.`
		d1681e	`This instructs EC to wait until all pending operations are completed`
		d1681e	`before declaring itself down. However heal operations are ignored`
		d1681e	`and allowed to continue even after having said it was down.`
		d1681e
		d1681e	`This may cause unexpected results and crashes.`
		d1681e
		d1681e	`To solve this, heal operations are considered exactly equal as any`
		d1681e	`other operation and EC won't propagate PARENT_DOWN until all`
		d1681e	`operations, including healing, are complete. To avoid big delays`
		d1681e	`if this happens in the middle of a big heal, a check has been`
		d1681e	`added to quit current heal if shutdown is detected.`
		d1681e
		d1681e	`>Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034`
		d1681e	`>BUG: 1515266`
		d1681e	`>Signed-off-by: Xavier Hernandez <jahernan@redhat.com>`
		d1681e	`Upstream Patch: https://review.gluster.org/#/c/18840/`
		d1681e
		d1681e	`BUG: 1505570`
		d1681e	`Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034`
		d1681e	`Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>`
		d1681e	`Reviewed-on: https://code.engineering.redhat.com/gerrit/125199`
		d1681e	`Tested-by: RHGS Build Bot <nigelb@redhat.com>`
		d1681e	`---`
		d1681e	`xlators/cluster/ec/src/ec-data.c \| 21 ++------------`
		d1681e	`xlators/cluster/ec/src/ec-heal.c \| 59 +++++++++++++++++++++++++++++++++-------`
		d1681e	`2 files changed, 52 insertions(+), 28 deletions(-)`
		d1681e
		d1681e	`diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c`
		d1681e	`index 28bf988..54c708a 100644`
		d1681e	`--- a/xlators/cluster/ec/src/ec-data.c`
		d1681e	`+++ b/xlators/cluster/ec/src/ec-data.c`
		d1681e	`@@ -103,19 +103,6 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk)`
		d1681e	`mem_put(cbk);`
		d1681e	`}`
		d1681e
		d1681e	`-/* PARENT_DOWN will be notified to children only after these fops are complete`
		d1681e	`- * when graph switch happens. We do not want graph switch to be waiting on`
		d1681e	`- * heal to complete as healing big file/directory could take a while. Which`
		d1681e	`- * will lead to hang on the mount.`
		d1681e	`- */`
		d1681e	`-static gf_boolean_t`
		d1681e	`-ec_needs_graceful_completion (ec_fop_data_t *fop)`
		d1681e	`-{`
		d1681e	`- if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL))`
		d1681e	`- return _gf_true;`
		d1681e	`- return _gf_false;`
		d1681e	`-}`
		d1681e	`-`
		d1681e	`ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,`
		d1681e	`int32_t id, uint32_t flags,`
		d1681e	`uintptr_t target, int32_t minimum,`
		d1681e	`@@ -202,13 +189,11 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,`
		d1681e	`fop->parent = parent;`
		d1681e	`}`
		d1681e
		d1681e	`- if (ec_needs_graceful_completion (fop)) {`
		d1681e	`- LOCK(&ec->lock);`
		d1681e	`+ LOCK(&ec->lock);`
		d1681e
		d1681e	`- list_add_tail(&fop->pending_list, &ec->pending_fops);`
		d1681e	`+ list_add_tail(&fop->pending_list, &ec->pending_fops);`
		d1681e
		d1681e	`- UNLOCK(&ec->lock);`
		d1681e	`- }`
		d1681e	`+ UNLOCK(&ec->lock);`
		d1681e
		d1681e	`return fop;`
		d1681e	`}`
		d1681e	`diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c`
		d1681e	`index fd8c902..b8518d6 100644`
		d1681e	`--- a/xlators/cluster/ec/src/ec-heal.c`
		d1681e	`+++ b/xlators/cluster/ec/src/ec-heal.c`
		d1681e	`@@ -1418,6 +1418,12 @@ ec_name_heal_handler (xlator_t subvol, gf_dirent_t entry, loc_t *parent,`
		d1681e	`int i = 0;`
		d1681e	`int ret = 0;`
		d1681e
		d1681e	`+ if (ec->shutdown) {`
		d1681e	`+ gf_msg_debug(this->name, 0, "Cancelling directory heal "`
		d1681e	`+ "because EC is stopping.");`
		d1681e	`+ return -ENOTCONN;`
		d1681e	`+ }`
		d1681e	`+`
		d1681e	`memcpy (name_on, name_data->participants, ec->nodes);`
		d1681e	`ret = ec_heal_name (name_data->frame, ec, parent->inode,`
		d1681e	`entry->d_name, name_on);`
		d1681e	`@@ -1439,6 +1445,7 @@ ec_heal_names (call_frame_t frame, ec_t ec, inode_t *inode,`
		d1681e	`int j = 0;`
		d1681e	`loc_t loc = {0};`
		d1681e	`struct ec_name_data name_data = {0};`
		d1681e	`+ int ret = 0;`
		d1681e
		d1681e	`loc.inode = inode_ref (inode);`
		d1681e	`gf_uuid_copy (loc.gfid, inode->gfid);`
		d1681e	`@@ -1449,18 +1456,23 @@ ec_heal_names (call_frame_t frame, ec_t ec, inode_t *inode,`
		d1681e	`for (i = 0; i < ec->nodes; i++) {`
		d1681e	`if (!participants[i])`
		d1681e	`continue;`
		d1681e	`- syncop_dir_scan (ec->xl_list[i], &loc,`
		d1681e	`- GF_CLIENT_PID_SELF_HEALD, &name_data,`
		d1681e	`- ec_name_heal_handler);`
		d1681e	`+ ret = syncop_dir_scan (ec->xl_list[i], &loc,`
		d1681e	`+ GF_CLIENT_PID_SELF_HEALD, &name_data,`
		d1681e	`+ ec_name_heal_handler);`
		d1681e	`+ if (ret < 0) {`
		d1681e	`+ break;`
		d1681e	`+ }`
		d1681e	`for (j = 0; j < ec->nodes; j++)`
		d1681e	`if (name_data.failed_on[j])`
		d1681e	`participants[j] = 0;`
		d1681e
		d1681e	`- if (EC_COUNT (participants, ec->nodes) <= ec->fragments)`
		d1681e	`- return -ENOTCONN;`
		d1681e	`+ if (EC_COUNT (participants, ec->nodes) <= ec->fragments) {`
		d1681e	`+ ret = -ENOTCONN;`
		d1681e	`+ break;`
		d1681e	`+ }`
		d1681e	`}`
		d1681e	`loc_wipe (&loc;;`
		d1681e	`- return 0;`
		d1681e	`+ return ret;`
		d1681e	`}`
		d1681e
		d1681e	`int`
		d1681e	`@@ -1999,6 +2011,17 @@ ec_rebuild_data (call_frame_t frame, ec_t ec, fd_t *fd, uint64_t size,`
		d1681e
		d1681e	`for (heal->offset = 0; (heal->offset < size) && !heal->done;`
		d1681e	`heal->offset += heal->size) {`
		d1681e	`+ /* We immediately abort any heal if a shutdown request has been`
		d1681e	`+ * received to avoid delays. The healing of this file will be`
		d1681e	`+ * restarted by another SHD or other client that accesses the`
		d1681e	`+ * file. */`
		d1681e	`+ if (ec->shutdown) {`
		d1681e	`+ gf_msg_debug(ec->xl->name, 0, "Cancelling heal because "`
		d1681e	`+ "EC is stopping.");`
		d1681e	`+ ret = -ENOTCONN;`
		d1681e	`+ break;`
		d1681e	`+ }`
		d1681e	`+`
		d1681e	`gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "`
		d1681e	`"%d, offset: %"PRIu64" bsize: %"PRIu64,`
		d1681e	`uuid_utoa (fd->inode->gfid),`
		d1681e	`@@ -2595,16 +2618,32 @@ ec_handle_healers_done (ec_fop_data_t *fop)`
		d1681e	`return;`
		d1681e
		d1681e	`LOCK (&ec->lock);`
		d1681e	`- {`
		d1681e	`- list_del_init (&fop->healer);`
		d1681e	`+`
		d1681e	`+ list_del_init (&fop->healer);`
		d1681e	`+`
		d1681e	`+ do {`
		d1681e	`ec->healers--;`
		d1681e	`heal_fop = __ec_dequeue_heals (ec);`
		d1681e	`- }`
		d1681e	`+`
		d1681e	`+ if ((heal_fop != NULL) && ec->shutdown) {`
		d1681e	`+ /* This will prevent ec_handle_healers_done() to be`
		d1681e	`+ * called recursively. That would be problematic if`
		d1681e	`+ * the queue is too big. */`
		d1681e	`+ list_del_init(&heal_fop->healer);`
		d1681e	`+`
		d1681e	`+ UNLOCK(&ec->lock);`
		d1681e	`+`
		d1681e	`+ ec_fop_set_error(fop, ENOTCONN);`
		d1681e	`+ ec_heal_fail(ec, heal_fop);`
		d1681e	`+`
		d1681e	`+ LOCK(&ec->lock);`
		d1681e	`+ }`
		d1681e	`+ } while ((heal_fop != NULL) && ec->shutdown);`
		d1681e	`+`
		d1681e	`UNLOCK (&ec->lock);`
		d1681e
		d1681e	`if (heal_fop)`
		d1681e	`ec_launch_heal (ec, heal_fop);`
		d1681e	`-`
		d1681e	`}`
		d1681e
		d1681e	`void`
		d1681e	`--`
		d1681e	`1.8.3.1`
		d1681e

rpms / glusterfs

Source Code

Blame SOURCES/0101-cluster-ec-Prevent-self-heal-to-work-after-PARENT_DO.patch