|
|
3604df |
From 4a6bdc7f785b8a030142d8036a13ae2f78984934 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Date: Thu, 6 Apr 2017 18:10:41 +0530
|
|
|
3604df |
Subject: [PATCH 308/316] features/shard: Fix vm corruption upon fix-layout
|
|
|
3604df |
|
|
|
3604df |
Backport of: https://review.gluster.org/17010
|
|
|
3604df |
|
|
|
3604df |
shard's writev implementation, as part of identifying
|
|
|
3604df |
presence of participant shards that aren't in memory,
|
|
|
3604df |
first sends an MKNOD on these shards, and upon EEXIST error,
|
|
|
3604df |
looks up the shards before proceeding with the writes.
|
|
|
3604df |
|
|
|
3604df |
The VM corruption was caused when the following happened:
|
|
|
3604df |
1. DHT had n subvolumes initially.
|
|
|
3604df |
2. Upon add-brick + fix-layout, the layout of .shard changed
|
|
|
3604df |
although the existing shards under it were yet to be migrated
|
|
|
3604df |
to their new hashed subvolumes.
|
|
|
3604df |
3. During this time, there were writes on the VM falling in regions
|
|
|
3604df |
of the file whose corresponding shards were already existing under
|
|
|
3604df |
.shard.
|
|
|
3604df |
4. Sharding xl sent MKNOD on these shards, now creating them in their
|
|
|
3604df |
new hashed subvolumes although there already exist shard blocks for
|
|
|
3604df |
this region with valid data.
|
|
|
3604df |
5. All subsequent writes were wound on these newly created copies.
|
|
|
3604df |
|
|
|
3604df |
The net outcome is that both copies of the shard didn't have the correct
|
|
|
3604df |
data. This caused the affected VMs to be unbootable.
|
|
|
3604df |
|
|
|
3604df |
FIX:
|
|
|
3604df |
For want of better alternatives in DHT, the fix changes shard fops to do
|
|
|
3604df |
a LOOKUP before the MKNOD and upon EEXIST error, perform another lookup.
|
|
|
3604df |
|
|
|
3604df |
Change-Id: I4086e7a9c27c9325b3830f4274be87847283a9f2
|
|
|
3604df |
BUG: 1439753
|
|
|
3604df |
RCA'd-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
|
|
|
3604df |
Reported-by: Mahdi Adnan <mahdi.adnan@outlook.com>
|
|
|
3604df |
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/105195
|
|
|
3604df |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
xlators/features/shard/src/shard.c | 154 +++++++++++++++++++++++--------------
|
|
|
3604df |
xlators/features/shard/src/shard.h | 1 +
|
|
|
3604df |
2 files changed, 96 insertions(+), 59 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
3604df |
index 9504f12..eabed90 100644
|
|
|
3604df |
--- a/xlators/features/shard/src/shard.c
|
|
|
3604df |
+++ b/xlators/features/shard/src/shard.c
|
|
|
3604df |
@@ -1693,11 +1693,30 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
|
|
|
3604df |
if (op_ret < 0) {
|
|
|
3604df |
/* Ignore absence of shards in the backend in truncate fop. */
|
|
|
3604df |
- if (((local->fop == GF_FOP_TRUNCATE) ||
|
|
|
3604df |
- (local->fop == GF_FOP_FTRUNCATE) ||
|
|
|
3604df |
- (local->fop == GF_FOP_RENAME) ||
|
|
|
3604df |
- (local->fop == GF_FOP_UNLINK)) && (op_errno == ENOENT))
|
|
|
3604df |
- goto done;
|
|
|
3604df |
+ switch (local->fop) {
|
|
|
3604df |
+ case GF_FOP_TRUNCATE:
|
|
|
3604df |
+ case GF_FOP_FTRUNCATE:
|
|
|
3604df |
+ case GF_FOP_RENAME:
|
|
|
3604df |
+ case GF_FOP_UNLINK:
|
|
|
3604df |
+ if (op_errno == ENOENT)
|
|
|
3604df |
+ goto done;
|
|
|
3604df |
+ break;
|
|
|
3604df |
+ case GF_FOP_WRITE:
|
|
|
3604df |
+ case GF_FOP_READ:
|
|
|
3604df |
+ case GF_FOP_ZEROFILL:
|
|
|
3604df |
+ case GF_FOP_DISCARD:
|
|
|
3604df |
+ case GF_FOP_FALLOCATE:
|
|
|
3604df |
+ if ((!local->first_lookup_done) &&
|
|
|
3604df |
+ (op_errno == ENOENT)) {
|
|
|
3604df |
+ local->create_count++;
|
|
|
3604df |
+ goto done;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ break;
|
|
|
3604df |
+ default:
|
|
|
3604df |
+ break;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ /* else */
|
|
|
3604df |
gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
|
|
|
3604df |
"failed. Base file gfid = %s", shard_block_num,
|
|
|
3604df |
@@ -1714,6 +1733,8 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
done:
|
|
|
3604df |
call_count = shard_call_count_return (frame);
|
|
|
3604df |
if (call_count == 0) {
|
|
|
3604df |
+ if (!local->first_lookup_done)
|
|
|
3604df |
+ local->first_lookup_done = _gf_true;
|
|
|
3604df |
if (local->op_ret < 0)
|
|
|
3604df |
goto unwind;
|
|
|
3604df |
else
|
|
|
3604df |
@@ -3197,47 +3218,6 @@ next:
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
-shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
-{
|
|
|
3604df |
- shard_local_t *local = NULL;
|
|
|
3604df |
-
|
|
|
3604df |
- local = frame->local;
|
|
|
3604df |
-
|
|
|
3604df |
- if (local->op_ret < 0) {
|
|
|
3604df |
- SHARD_STACK_UNWIND (readv, frame, local->op_ret,
|
|
|
3604df |
- local->op_errno, NULL, 0, NULL, NULL, NULL);
|
|
|
3604df |
- return 0;
|
|
|
3604df |
- }
|
|
|
3604df |
-
|
|
|
3604df |
- shard_readv_do (frame, this);
|
|
|
3604df |
-
|
|
|
3604df |
- return 0;
|
|
|
3604df |
-}
|
|
|
3604df |
-
|
|
|
3604df |
-int
|
|
|
3604df |
-shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
-{
|
|
|
3604df |
- shard_local_t *local = NULL;
|
|
|
3604df |
-
|
|
|
3604df |
- local = frame->local;
|
|
|
3604df |
-
|
|
|
3604df |
- if (local->op_ret < 0) {
|
|
|
3604df |
- SHARD_STACK_UNWIND (readv, frame, local->op_ret,
|
|
|
3604df |
- local->op_errno, NULL, 0, NULL, NULL, NULL);
|
|
|
3604df |
- return 0;
|
|
|
3604df |
- }
|
|
|
3604df |
-
|
|
|
3604df |
- if (!local->eexist_count) {
|
|
|
3604df |
- shard_readv_do (frame, this);
|
|
|
3604df |
- } else {
|
|
|
3604df |
- local->call_count = local->eexist_count;
|
|
|
3604df |
- shard_common_lookup_shards (frame, this, local->loc.inode,
|
|
|
3604df |
- shard_post_lookup_shards_readv_handler);
|
|
|
3604df |
- }
|
|
|
3604df |
- return 0;
|
|
|
3604df |
-}
|
|
|
3604df |
-
|
|
|
3604df |
-int
|
|
|
3604df |
shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
3604df |
int32_t op_ret, int32_t op_errno, inode_t *inode,
|
|
|
3604df |
struct iatt *buf, struct iatt *preparent,
|
|
|
3604df |
@@ -3267,6 +3247,7 @@ done:
|
|
|
3604df |
call_count = shard_call_count_return (frame);
|
|
|
3604df |
if (call_count == 0) {
|
|
|
3604df |
SHARD_UNSET_ROOT_FS_ID (frame, local);
|
|
|
3604df |
+ local->create_count = 0;
|
|
|
3604df |
local->post_mknod_handler (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -3397,6 +3378,55 @@ err:
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
+shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this);
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
+shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ local = frame->local;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (local->op_ret < 0) {
|
|
|
3604df |
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
|
|
|
3604df |
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ if (local->create_count) {
|
|
|
3604df |
+ shard_common_resume_mknod (frame, this,
|
|
|
3604df |
+ shard_post_mknod_readv_handler);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ shard_readv_do (frame, this);
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
+shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
+{
|
|
|
3604df |
+ shard_local_t *local = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ local = frame->local;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (local->op_ret < 0) {
|
|
|
3604df |
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
|
|
|
3604df |
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!local->eexist_count) {
|
|
|
3604df |
+ shard_readv_do (frame, this);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ local->call_count = local->eexist_count;
|
|
|
3604df |
+ shard_common_lookup_shards (frame, this, local->loc.inode,
|
|
|
3604df |
+ shard_post_lookup_shards_readv_handler);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
{
|
|
|
3604df |
shard_local_t *local = NULL;
|
|
|
3604df |
@@ -3422,9 +3452,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
if (local->call_count) {
|
|
|
3604df |
- local->create_count = local->call_count;
|
|
|
3604df |
- shard_common_resume_mknod (frame, this,
|
|
|
3604df |
- shard_post_mknod_readv_handler);
|
|
|
3604df |
+ shard_common_lookup_shards (frame, this,
|
|
|
3604df |
+ local->resolver_base_inode,
|
|
|
3604df |
+ shard_post_lookup_shards_readv_handler);
|
|
|
3604df |
} else {
|
|
|
3604df |
shard_readv_do (frame, this);
|
|
|
3604df |
}
|
|
|
3604df |
@@ -3575,14 +3605,11 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
|
3604df |
|
|
|
3604df |
shard_lookup_base_file (frame, this, &local->loc,
|
|
|
3604df |
shard_post_lookup_readv_handler);
|
|
|
3604df |
-
|
|
|
3604df |
return 0;
|
|
|
3604df |
-
|
|
|
3604df |
err:
|
|
|
3604df |
SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
|
|
|
3604df |
NULL);
|
|
|
3604df |
return 0;
|
|
|
3604df |
-
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
@@ -3875,6 +3902,10 @@ next:
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
+shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
|
|
|
3604df |
+ xlator_t *this);
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
|
|
|
3604df |
xlator_t *this)
|
|
|
3604df |
{
|
|
|
3604df |
@@ -3889,7 +3920,12 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- shard_common_inode_write_do (frame, this);
|
|
|
3604df |
+ if (local->create_count) {
|
|
|
3604df |
+ shard_common_resume_mknod (frame, this,
|
|
|
3604df |
+ shard_common_inode_write_post_mknod_handler);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ shard_common_inode_write_do (frame, this);
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
@@ -3937,11 +3973,13 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
|
|
|
3604df |
|
|
|
3604df |
local->postbuf = local->prebuf;
|
|
|
3604df |
|
|
|
3604df |
- if (local->create_count)
|
|
|
3604df |
- shard_common_resume_mknod (frame, this,
|
|
|
3604df |
- shard_common_inode_write_post_mknod_handler);
|
|
|
3604df |
- else
|
|
|
3604df |
+ if (local->call_count) {
|
|
|
3604df |
+ shard_common_lookup_shards (frame, this,
|
|
|
3604df |
+ local->resolver_base_inode,
|
|
|
3604df |
+ shard_common_inode_write_post_lookup_shards_handler);
|
|
|
3604df |
+ } else {
|
|
|
3604df |
shard_common_inode_write_do (frame, this);
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
@@ -3961,8 +3999,6 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- local->create_count = local->call_count;
|
|
|
3604df |
-
|
|
|
3604df |
shard_lookup_base_file (frame, this, &local->loc,
|
|
|
3604df |
shard_common_inode_write_post_lookup_handler);
|
|
|
3604df |
return 0;
|
|
|
3604df |
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
|
3604df |
index 09232a4..7319598 100644
|
|
|
3604df |
--- a/xlators/features/shard/src/shard.h
|
|
|
3604df |
+++ b/xlators/features/shard/src/shard.h
|
|
|
3604df |
@@ -255,6 +255,7 @@ typedef struct shard_local {
|
|
|
3604df |
shard_lock_t *shard_lock;
|
|
|
3604df |
} lock;
|
|
|
3604df |
inode_t *resolver_base_inode;
|
|
|
3604df |
+ gf_boolean_t first_lookup_done;
|
|
|
3604df |
} shard_local_t;
|
|
|
3604df |
|
|
|
3604df |
typedef struct shard_inode_ctx {
|
|
|
3604df |
--
|
|
|
3604df |
1.8.3.1
|
|
|
3604df |
|