Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0274-cluster-ec-Do-not-start-heal-on-good-file-while-IO-i.patch

Blob History Raw

		3604df	`From 987ad7c9be7ae9f56ac6834819d3b2ea081d6433 Mon Sep 17 00:00:00 2001`
		3604df	`From: Ashish Pandey <aspandey@redhat.com>`
		3604df	`Date: Wed, 11 Jan 2017 17:19:30 +0530`
		3604df	`Subject: [PATCH 274/275] cluster/ec: Do not start heal on good file while IO`
		3604df	`is going on`
		3604df
		3604df	`Problem:`
		3604df	`Write on a file has been slowed down significantly after`
		3604df	`http://review.gluster.org/#/c/13733/`
		3604df
		3604df	`RC : When update fop starts on a file, it sets dirty flag at`
		3604df	`the start and remove it at the end which make an index entry`
		3604df	`in indices/xattrop. During IO, SHD scans this and finds out`
		3604df	`an index and starts heal even if all the fragments are healthy`
		3604df	`and up tp date. This heal takes inodelk for different types of`
		3604df	`heal. If the IO is for long time this will happen in every 60 seconds.`
		3604df	`Due to this extra, unneccessary locking, IO gets slowed down.`
		3604df
		3604df	`Solution:`
		3604df	`Before starting any type of heal check if file needs heal or not.`
		3604df
		3604df	`>Change-Id: Ib9519a43e7e4b2565d3f3153f9ca0fb92174fe51`
		3604df	`>BUG: 1409191`
		3604df	`>Signed-off-by: Ashish Pandey <aspandey@redhat.com>`
		3604df	`>Reviewed-on: http://review.gluster.org/16377`
		3604df	`>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>`
		3604df	`>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>`
		3604df	`>Smoke: Gluster Build System <jenkins@build.gluster.org>`
		3604df	`>Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>`
		3604df	`>Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>`
		3604df	`>Signed-off-by: Ashish Pandey <aspandey@redhat.com>`
		3604df
		3604df	`Change-Id: Ib9519a43e7e4b2565d3f3153f9ca0fb92174fe51`
		3604df	`BUG: 1408639`
		3604df	`Signed-off-by: Ashish Pandey <aspandey@redhat.com>`
		3604df	`Reviewed-on: https://code.engineering.redhat.com/gerrit/95870`
		3604df	`Reviewed-by: Atin Mukherjee <amukherj@redhat.com>`
		3604df	`---`
		3604df	`xlators/cluster/ec/src/ec-common.h \| 4 ++++`
		3604df	`xlators/cluster/ec/src/ec-heal.c \| 41 ++++++++++++++++++++++++++++++++++----`
		3604df	`2 files changed, 41 insertions(+), 4 deletions(-)`
		3604df
		3604df	`diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h`
		3604df	`index e3fc3ce..c532e0e 100644`
		3604df	`--- a/xlators/cluster/ec/src/ec-common.h`
		3604df	`+++ b/xlators/cluster/ec/src/ec-common.h`
		3604df	`@@ -119,6 +119,10 @@ gf_boolean_t ec_is_recoverable_error (int32_t op_errno);`
		3604df	`void ec_handle_healers_done (ec_fop_data_t *fop);`
		3604df
		3604df	`int32_t`
		3604df	`+ec_heal_inspect (call_frame_t frame, ec_t ec,`
		3604df	`+ inode_t inode, unsigned char locked_on,`
		3604df	`+ gf_boolean_t *need_heal);`
		3604df	`+int32_t`
		3604df	`ec_get_heal_info (xlator_t this, loc_t loc, dict_t **dict);`
		3604df
		3604df	`#endif /* __EC_COMMON_H__ */`
		3604df	`diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c`
		3604df	`index 8503b6d..81704e5 100644`
		3604df	`--- a/xlators/cluster/ec/src/ec-heal.c`
		3604df	`+++ b/xlators/cluster/ec/src/ec-heal.c`
		3604df	`@@ -2332,6 +2332,8 @@ ec_heal_do (xlator_t this, void data, loc_t *loc, int32_t partial)`
		3604df	`intptr_t bad = 0;`
		3604df	`ec_fop_data_t *fop = data;`
		3604df	`gf_boolean_t blocking = _gf_false;`
		3604df	`+ gf_boolean_t need_heal = _gf_false;`
		3604df	`+ unsigned char *up_subvols = NULL;`
		3604df
		3604df	`ec = this->private;`
		3604df
		3604df	`@@ -2353,6 +2355,19 @@ ec_heal_do (xlator_t this, void data, loc_t *loc, int32_t partial)`
		3604df	`frame->root->pid = GF_CLIENT_PID_SELF_HEALD;`
		3604df	`participants = alloca0(ec->nodes);`
		3604df	`ec_mask_to_char_array (ec->xl_up, participants, ec->nodes);`
		3604df	`+`
		3604df	`+ up_subvols = alloca0(ec->nodes);`
		3604df	`+ ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);`
		3604df	`+`
		3604df	`+ ec_heal_inspect (frame, ec, loc->inode, up_subvols,`
		3604df	`+ &need_heal);`
		3604df	`+ if (!need_heal) {`
		3604df	`+ gf_msg (ec->xl->name, GF_LOG_DEBUG, 0,`
		3604df	`+ EC_MSG_HEAL_FAIL, "Heal is not required for : %s ",`
		3604df	`+ uuid_utoa(loc->gfid));`
		3604df	`+ goto out;`
		3604df	`+ }`
		3604df	`+`
		3604df	`if (loc->name && strlen (loc->name)) {`
		3604df	`ret = ec_heal_name (frame, ec, loc->parent, (char *)loc->name,`
		3604df	`participants);`
		3604df	`@@ -2401,7 +2416,7 @@ ec_heal_do (xlator_t this, void data, loc_t *loc, int32_t partial)`
		3604df	`op_errno = -ret;`
		3604df	`}`
		3604df
		3604df	`-`
		3604df	`+out:`
		3604df	`if (fop->cbks.heal) {`
		3604df	`fop->cbks.heal (fop->req_frame, fop, fop->xl, op_ret,`
		3604df	`op_errno, ec_char_array_to_mask (participants,`
		3604df	`@@ -2665,7 +2680,8 @@ out:`
		3604df	`}`
		3604df
		3604df	`int32_t`
		3604df	`-ec_need_heal (ec_t ec, default_args_cbk_t replies, gf_boolean_t *need_heal)`
		3604df	`+ec_need_heal (ec_t ec, default_args_cbk_t replies,`
		3604df	`+ gf_boolean_t *need_heal, int32_t lock_count)`
		3604df	`{`
		3604df	`uint64_t *dirty = NULL;`
		3604df	`unsigned char *sources = NULL;`
		3604df	`@@ -2691,7 +2707,9 @@ ec_need_heal (ec_t ec, default_args_cbk_t replies, gf_boolean_t *need_heal)`
		3604df	`goto out;`
		3604df	`}`
		3604df	`source_count = EC_COUNT (sources, ec->nodes);`
		3604df	`- if (source_count != ec->nodes) {`
		3604df	`+ if (source_count == ec->nodes && lock_count > 0) {`
		3604df	`+ *need_heal = _gf_false;`
		3604df	`+ } else {`
		3604df	`*need_heal = _gf_true;`
		3604df	`}`
		3604df	`ret = source_count;`
		3604df	`@@ -2705,12 +2723,14 @@ ec_heal_inspect (call_frame_t frame, ec_t ec,`
		3604df	`gf_boolean_t *need_heal)`
		3604df	`{`
		3604df	`loc_t loc = {0};`
		3604df	`+ int i = 0;`
		3604df	`int ret = 0;`
		3604df	`dict_t *xdata = NULL;`
		3604df	`uint64_t zero_array[2] = {0};`
		3604df	`uint64_t zero_value = 0;`
		3604df	`unsigned char *output = NULL;`
		3604df	`default_args_cbk_t *replies = NULL;`
		3604df	`+ int32_t lock_count = 0;`
		3604df
		3604df	`EC_REPLIES_ALLOC (replies, ec->nodes);`
		3604df	`output = alloca0 (ec->nodes);`
		3604df	`@@ -2720,6 +2740,8 @@ ec_heal_inspect (call_frame_t frame, ec_t ec,`
		3604df
		3604df	`xdata = dict_new ();`
		3604df	`if (!xdata \|\|`
		3604df	`+ dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT,`
		3604df	`+ ec->xl->name) \|\|`
		3604df	`dict_set_static_bin (xdata, EC_XATTR_VERSION, zero_array,`
		3604df	`sizeof (zero_array)) \|\|`
		3604df	`dict_set_static_bin (xdata, EC_XATTR_DIRTY, zero_array,`
		3604df	`@@ -2731,12 +2753,23 @@ ec_heal_inspect (call_frame_t frame, ec_t ec,`
		3604df	`}`
		3604df	`ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies,`
		3604df	`output, frame, ec->xl, &loc, xdata);`
		3604df	`+`
		3604df	`if (ret != ec->nodes) {`
		3604df	`ret = ec->nodes;`
		3604df	`*need_heal = _gf_true;`
		3604df	`goto out;`
		3604df	`}`
		3604df	`- ret = ec_need_heal (ec, replies, need_heal);`
		3604df	`+`
		3604df	`+ for (i = 0; i < ec->nodes; i++) {`
		3604df	`+ if (!output[i] \|\| !replies[i].xdata) {`
		3604df	`+ continue;`
		3604df	`+ }`
		3604df	`+ if ((dict_get_int32 (replies[i].xdata, GLUSTERFS_INODELK_COUNT,`
		3604df	`+ &lock_count) == 0) && lock_count > 0) {`
		3604df	`+ break;`
		3604df	`+ }`
		3604df	`+ }`
		3604df	`+ ret = ec_need_heal (ec, replies, need_heal, lock_count);`
		3604df
		3604df	`out:`
		3604df	`cluster_replies_wipe (replies, ec->nodes);`
		3604df	`--`
		3604df	`2.9.3`
		3604df

rpms / glusterfs

Source Code

Blame SOURCES/0274-cluster-ec-Do-not-start-heal-on-good-file-while-IO-i.patch