Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0236-cluster-ec-Prevent-data-corruptions.patch

Blob History Raw

		cb8e9e	`From a06d455f2f3ad30e7aab2a6ac9594281dd1386c7 Mon Sep 17 00:00:00 2001`
		cb8e9e	`From: Pranith Kumar K <pkarampu@redhat.com>`
		cb8e9e	`Date: Wed, 8 Jul 2015 17:52:11 +0530`
		cb8e9e	`Subject: [PATCH 236/244] cluster/ec: Prevent data corruptions`
		cb8e9e
		cb8e9e	`Backport of http://review.gluster.org/11580`
		cb8e9e	`Backport of http://review.gluster.com/11640`
		cb8e9e
		cb8e9e	`- Don't read from bricks that are healing`
		cb8e9e	`- On lock reuse preserve 'healing' bits`
		cb8e9e	`- Don't set ctx->size outside locks in healing code`
		cb8e9e	`- Allow xattrop internal fops also on the fop->mask.`
		cb8e9e
		cb8e9e	`Change-Id: I35503039e4723cf7f33d6797f0ba90dd0aca130b`
		cb8e9e	`BUG: 1230513`
		cb8e9e	`Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>`
		cb8e9e	`Reviewed-on: https://code.engineering.redhat.com/gerrit/52663`
		cb8e9e	`---`
		cb8e9e	`xlators/cluster/ec/src/ec-common.c \| 25 +++++++++++++++++++++----`
		cb8e9e	`xlators/cluster/ec/src/ec-data.h \| 1 +`
		cb8e9e	`xlators/cluster/ec/src/ec-heal.c \| 22 +++++++++++-----------`
		cb8e9e	`xlators/cluster/ec/src/ec.c \| 2 +-`
		cb8e9e	`4 files changed, 34 insertions(+), 16 deletions(-)`
		cb8e9e
		cb8e9e	`diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c`
		cb8e9e	`index 439773e..b2d0348 100644`
		cb8e9e	`--- a/xlators/cluster/ec/src/ec-common.c`
		cb8e9e	`+++ b/xlators/cluster/ec/src/ec-common.c`
		cb8e9e	`@@ -401,6 +401,18 @@ ec_must_wind (ec_fop_data_t *fop)`
		cb8e9e	`return _gf_false;`
		cb8e9e	`}`
		cb8e9e
		cb8e9e	`+static gf_boolean_t`
		cb8e9e	`+ec_internal_op (ec_fop_data_t *fop)`
		cb8e9e	`+{`
		cb8e9e	`+ if (ec_must_wind (fop))`
		cb8e9e	`+ return _gf_true;`
		cb8e9e	`+ if (fop->id == GF_FOP_XATTROP)`
		cb8e9e	`+ return _gf_true;`
		cb8e9e	`+ if (fop->id == GF_FOP_FXATTROP)`
		cb8e9e	`+ return _gf_true;`
		cb8e9e	`+ return _gf_false;`
		cb8e9e	`+}`
		cb8e9e	`+`
		cb8e9e	`int32_t ec_child_select(ec_fop_data_t * fop)`
		cb8e9e	`{`
		cb8e9e	`ec_t * ec = fop->xl->private;`
		cb8e9e	`@@ -413,8 +425,9 @@ int32_t ec_child_select(ec_fop_data_t * fop)`
		cb8e9e	`/* Wind the fop on same subvols as parent for any internal extra fops like`
		cb8e9e	`* head/tail read in case of writev fop. Unlocks shouldn't do this because`
		cb8e9e	`* unlock should go on all subvols where lock is performed*/`
		cb8e9e	`- if (fop->parent && !ec_must_wind (fop))`
		cb8e9e	`- fop->mask &= fop->parent->mask;`
		cb8e9e	`+ if (fop->parent && !ec_internal_op (fop)) {`
		cb8e9e	`+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);`
		cb8e9e	`+ }`
		cb8e9e
		cb8e9e	`mask = ec->xl_up;`
		cb8e9e	`if (fop->parent == NULL)`
		cb8e9e	`@@ -972,6 +985,7 @@ out:`
		cb8e9e	`parent->mask &= fop->good;`
		cb8e9e
		cb8e9e	`/As of now only data healing marks bricks as healing/`
		cb8e9e	`+ lock->healing \|= fop->healing;`
		cb8e9e	`if (ec_is_data_fop (parent->id)) {`
		cb8e9e	`parent->healing \|= fop->healing;`
		cb8e9e	`}`
		cb8e9e	`@@ -996,9 +1010,13 @@ void ec_get_size_version(ec_lock_link_t *link)`
		cb8e9e
		cb8e9e	`lock = link->lock;`
		cb8e9e	`ctx = lock->ctx;`
		cb8e9e	`+ fop = link->fop;`
		cb8e9e
		cb8e9e	`/* If ec metadata has already been retrieved, do not try again. */`
		cb8e9e	`if (ctx->have_info) {`
		cb8e9e	`+ if (ec_is_data_fop (fop->id)) {`
		cb8e9e	`+ fop->healing \|= lock->healing;`
		cb8e9e	`+ }`
		cb8e9e	`return;`
		cb8e9e	`}`
		cb8e9e
		cb8e9e	`@@ -1008,8 +1026,6 @@ void ec_get_size_version(ec_lock_link_t *link)`
		cb8e9e	`return;`
		cb8e9e	`}`
		cb8e9e
		cb8e9e	`- fop = link->fop;`
		cb8e9e	`-`
		cb8e9e	`uid = fop->frame->root->uid;`
		cb8e9e	`gid = fop->frame->root->gid;`
		cb8e9e
		cb8e9e	`@@ -1274,6 +1290,7 @@ int32_t ec_locked(call_frame_t frame, void cookie, xlator_t *this,`
		cb8e9e	`link = fop->data;`
		cb8e9e	`lock = link->lock;`
		cb8e9e	`lock->mask = lock->good_mask = fop->good;`
		cb8e9e	`+ lock->healing = 0;`
		cb8e9e
		cb8e9e	`ec_lock_acquired(link);`
		cb8e9e	`ec_lock(fop->parent);`
		cb8e9e	`diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h`
		cb8e9e	`index 135ccdf..ec470e9 100644`
		cb8e9e	`--- a/xlators/cluster/ec/src/ec-data.h`
		cb8e9e	`+++ b/xlators/cluster/ec/src/ec-data.h`
		cb8e9e	`@@ -146,6 +146,7 @@ struct _ec_lock`
		cb8e9e	`the next unlock/lock cycle. */`
		cb8e9e	`uintptr_t mask;`
		cb8e9e	`uintptr_t good_mask;`
		cb8e9e	`+ uintptr_t healing;`
		cb8e9e	`int32_t refs;`
		cb8e9e	`int32_t refs_frozen;`
		cb8e9e	`int32_t inserted;`
		cb8e9e	`diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c`
		cb8e9e	`index 6f82203..a7c97a5 100644`
		cb8e9e	`--- a/xlators/cluster/ec/src/ec-heal.c`
		cb8e9e	`+++ b/xlators/cluster/ec/src/ec-heal.c`
		cb8e9e	`@@ -256,10 +256,13 @@ int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,`
		cb8e9e	`}`
		cb8e9e	`else`
		cb8e9e	`{`
		cb8e9e	`- gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "`
		cb8e9e	`- "to heal block at %"PRIu64,`
		cb8e9e	`- uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),`
		cb8e9e	`- heal->offset);`
		cb8e9e	`+ if (op_ret < 0) {`
		cb8e9e	`+ gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "`
		cb8e9e	`+ "to heal block at %"PRIu64,`
		cb8e9e	`+ uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),`
		cb8e9e	`+ heal->offset);`
		cb8e9e	`+ heal->bad = 0;`
		cb8e9e	`+ }`
		cb8e9e	`heal->done = 1;`
		cb8e9e	`}`
		cb8e9e
		cb8e9e	`@@ -1709,10 +1712,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)`
		cb8e9e	`case EC_STATE_HEAL_DATA_UNLOCK:`
		cb8e9e	`ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);`
		cb8e9e
		cb8e9e	`- if (state < 0)`
		cb8e9e	`- return -EC_STATE_REPORT;`
		cb8e9e	`- else`
		cb8e9e	`- return EC_STATE_REPORT;`
		cb8e9e	`+ return EC_STATE_REPORT;`
		cb8e9e
		cb8e9e	`case EC_STATE_REPORT:`
		cb8e9e	`if (fop->cbks.heal) {`
		cb8e9e	`@@ -1728,7 +1728,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)`
		cb8e9e	`EIO, 0, 0, 0, NULL);`
		cb8e9e	`}`
		cb8e9e
		cb8e9e	`- return -EC_STATE_END;`
		cb8e9e	`+ return EC_STATE_END;`
		cb8e9e	`default:`
		cb8e9e	`gf_msg (fop->xl->name, GF_LOG_ERROR, 0,`
		cb8e9e	`EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",`
		cb8e9e	`@@ -1759,8 +1759,6 @@ ec_heal_block (call_frame_t frame, xlator_t this, uintptr_t target,`
		cb8e9e	`if (fop == NULL)`
		cb8e9e	`goto out;`
		cb8e9e
		cb8e9e	`- GF_ASSERT(ec_set_inode_size(fop, heal->fd->inode, heal->total_size));`
		cb8e9e	`-`
		cb8e9e	`error = 0;`
		cb8e9e
		cb8e9e	`out:`
		cb8e9e	`@@ -1834,6 +1832,8 @@ ec_rebuild_data (call_frame_t frame, ec_t ec, fd_t *fd, uint64_t size,`
		cb8e9e	`break;`
		cb8e9e
		cb8e9e	`}`
		cb8e9e	`+ memset (healed_sinks, 0, ec->nodes);`
		cb8e9e	`+ ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes);`
		cb8e9e	`fd_unref (heal->fd);`
		cb8e9e	`LOCK_DESTROY (&heal->lock);`
		cb8e9e	`syncbarrier_destroy (heal->data);`
		cb8e9e	`diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c`
		cb8e9e	`index e28f402..4673c11 100644`
		cb8e9e	`--- a/xlators/cluster/ec/src/ec.c`
		cb8e9e	`+++ b/xlators/cluster/ec/src/ec.c`
		cb8e9e	`@@ -1303,7 +1303,7 @@ struct volume_options options[] =`
		cb8e9e	`.type = GF_OPTION_TYPE_INT,`
		cb8e9e	`.min = 0,/Disabling background heals/`
		cb8e9e	`.max = 256,`
		cb8e9e	`- .default_value = "8",`
		cb8e9e	`+ .default_value = "0",`
		cb8e9e	`.description = "This option can be used to control number of parallel"`
		cb8e9e	`" heals",`
		cb8e9e	`},`
		cb8e9e	`--`
		cb8e9e	`1.7.1`
		cb8e9e

rpms / glusterfs

Source Code

Blame SOURCES/0236-cluster-ec-Prevent-data-corruptions.patch