|
|
cb8e9e |
From a06d455f2f3ad30e7aab2a6ac9594281dd1386c7 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
cb8e9e |
Date: Wed, 8 Jul 2015 17:52:11 +0530
|
|
|
cb8e9e |
Subject: [PATCH 236/244] cluster/ec: Prevent data corruptions
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Backport of http://review.gluster.org/11580
|
|
|
cb8e9e |
Backport of http://review.gluster.com/11640
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- Don't read from bricks that are healing
|
|
|
cb8e9e |
- On lock reuse preserve 'healing' bits
|
|
|
cb8e9e |
- Don't set ctx->size outside locks in healing code
|
|
|
cb8e9e |
- Allow xattrop internal fops also on the fop->mask.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I35503039e4723cf7f33d6797f0ba90dd0aca130b
|
|
|
cb8e9e |
BUG: 1230513
|
|
|
cb8e9e |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/52663
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-common.c | 25 +++++++++++++++++++++----
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-data.h | 1 +
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-heal.c | 22 +++++++++++-----------
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec.c | 2 +-
|
|
|
cb8e9e |
4 files changed, 34 insertions(+), 16 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
index 439773e..b2d0348 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
@@ -401,6 +401,18 @@ ec_must_wind (ec_fop_data_t *fop)
|
|
|
cb8e9e |
return _gf_false;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+static gf_boolean_t
|
|
|
cb8e9e |
+ec_internal_op (ec_fop_data_t *fop)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ if (ec_must_wind (fop))
|
|
|
cb8e9e |
+ return _gf_true;
|
|
|
cb8e9e |
+ if (fop->id == GF_FOP_XATTROP)
|
|
|
cb8e9e |
+ return _gf_true;
|
|
|
cb8e9e |
+ if (fop->id == GF_FOP_FXATTROP)
|
|
|
cb8e9e |
+ return _gf_true;
|
|
|
cb8e9e |
+ return _gf_false;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
int32_t ec_child_select(ec_fop_data_t * fop)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
ec_t * ec = fop->xl->private;
|
|
|
cb8e9e |
@@ -413,8 +425,9 @@ int32_t ec_child_select(ec_fop_data_t * fop)
|
|
|
cb8e9e |
/* Wind the fop on same subvols as parent for any internal extra fops like
|
|
|
cb8e9e |
* head/tail read in case of writev fop. Unlocks shouldn't do this because
|
|
|
cb8e9e |
* unlock should go on all subvols where lock is performed*/
|
|
|
cb8e9e |
- if (fop->parent && !ec_must_wind (fop))
|
|
|
cb8e9e |
- fop->mask &= fop->parent->mask;
|
|
|
cb8e9e |
+ if (fop->parent && !ec_internal_op (fop)) {
|
|
|
cb8e9e |
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
|
|
|
cb8e9e |
mask = ec->xl_up;
|
|
|
cb8e9e |
if (fop->parent == NULL)
|
|
|
cb8e9e |
@@ -972,6 +985,7 @@ out:
|
|
|
cb8e9e |
parent->mask &= fop->good;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/*As of now only data healing marks bricks as healing*/
|
|
|
cb8e9e |
+ lock->healing |= fop->healing;
|
|
|
cb8e9e |
if (ec_is_data_fop (parent->id)) {
|
|
|
cb8e9e |
parent->healing |= fop->healing;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
@@ -996,9 +1010,13 @@ void ec_get_size_version(ec_lock_link_t *link)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
lock = link->lock;
|
|
|
cb8e9e |
ctx = lock->ctx;
|
|
|
cb8e9e |
+ fop = link->fop;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* If ec metadata has already been retrieved, do not try again. */
|
|
|
cb8e9e |
if (ctx->have_info) {
|
|
|
cb8e9e |
+ if (ec_is_data_fop (fop->id)) {
|
|
|
cb8e9e |
+ fop->healing |= lock->healing;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
return;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1008,8 +1026,6 @@ void ec_get_size_version(ec_lock_link_t *link)
|
|
|
cb8e9e |
return;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- fop = link->fop;
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
uid = fop->frame->root->uid;
|
|
|
cb8e9e |
gid = fop->frame->root->gid;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1274,6 +1290,7 @@ int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
link = fop->data;
|
|
|
cb8e9e |
lock = link->lock;
|
|
|
cb8e9e |
lock->mask = lock->good_mask = fop->good;
|
|
|
cb8e9e |
+ lock->healing = 0;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ec_lock_acquired(link);
|
|
|
cb8e9e |
ec_lock(fop->parent);
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
index 135ccdf..ec470e9 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
@@ -146,6 +146,7 @@ struct _ec_lock
|
|
|
cb8e9e |
the next unlock/lock cycle. */
|
|
|
cb8e9e |
uintptr_t mask;
|
|
|
cb8e9e |
uintptr_t good_mask;
|
|
|
cb8e9e |
+ uintptr_t healing;
|
|
|
cb8e9e |
int32_t refs;
|
|
|
cb8e9e |
int32_t refs_frozen;
|
|
|
cb8e9e |
int32_t inserted;
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
|
|
cb8e9e |
index 6f82203..a7c97a5 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-heal.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-heal.c
|
|
|
cb8e9e |
@@ -256,10 +256,13 @@ int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
else
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
|
|
|
cb8e9e |
- "to heal block at %"PRIu64,
|
|
|
cb8e9e |
- uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
|
|
|
cb8e9e |
- heal->offset);
|
|
|
cb8e9e |
+ if (op_ret < 0) {
|
|
|
cb8e9e |
+ gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
|
|
|
cb8e9e |
+ "to heal block at %"PRIu64,
|
|
|
cb8e9e |
+ uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
|
|
|
cb8e9e |
+ heal->offset);
|
|
|
cb8e9e |
+ heal->bad = 0;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
heal->done = 1;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1709,10 +1712,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
|
|
|
cb8e9e |
case EC_STATE_HEAL_DATA_UNLOCK:
|
|
|
cb8e9e |
ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (state < 0)
|
|
|
cb8e9e |
- return -EC_STATE_REPORT;
|
|
|
cb8e9e |
- else
|
|
|
cb8e9e |
- return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ return EC_STATE_REPORT;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
case EC_STATE_REPORT:
|
|
|
cb8e9e |
if (fop->cbks.heal) {
|
|
|
cb8e9e |
@@ -1728,7 +1728,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
|
|
|
cb8e9e |
EIO, 0, 0, 0, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- return -EC_STATE_END;
|
|
|
cb8e9e |
+ return EC_STATE_END;
|
|
|
cb8e9e |
default:
|
|
|
cb8e9e |
gf_msg (fop->xl->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
|
|
|
cb8e9e |
@@ -1759,8 +1759,6 @@ ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target,
|
|
|
cb8e9e |
if (fop == NULL)
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- GF_ASSERT(ec_set_inode_size(fop, heal->fd->inode, heal->total_size));
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
error = 0;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
@@ -1834,6 +1832,8 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
|
|
|
cb8e9e |
break;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ memset (healed_sinks, 0, ec->nodes);
|
|
|
cb8e9e |
+ ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes);
|
|
|
cb8e9e |
fd_unref (heal->fd);
|
|
|
cb8e9e |
LOCK_DESTROY (&heal->lock);
|
|
|
cb8e9e |
syncbarrier_destroy (heal->data);
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
|
|
|
cb8e9e |
index e28f402..4673c11 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec.c
|
|
|
cb8e9e |
@@ -1303,7 +1303,7 @@ struct volume_options options[] =
|
|
|
cb8e9e |
.type = GF_OPTION_TYPE_INT,
|
|
|
cb8e9e |
.min = 0,/*Disabling background heals*/
|
|
|
cb8e9e |
.max = 256,
|
|
|
cb8e9e |
- .default_value = "8",
|
|
|
cb8e9e |
+ .default_value = "0",
|
|
|
cb8e9e |
.description = "This option can be used to control number of parallel"
|
|
|
cb8e9e |
" heals",
|
|
|
cb8e9e |
},
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|