|
|
cb8e9e |
From 52341e3c2b836e5b9815a546fa5364ab8c091364 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Raghavendra G <rgowdapp@redhat.com>
|
|
|
cb8e9e |
Date: Wed, 13 May 2015 19:56:47 +0530
|
|
|
cb8e9e |
Subject: [PATCH 12/18] cluster/dht: Don't rely on linkto xattr to find
|
|
|
cb8e9e |
destination subvol during phase 2 of migration.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
linkto xattr on source file cannot be relied to find where the data
|
|
|
cb8e9e |
file currently resides. This can happen if there are multiple
|
|
|
cb8e9e |
migrations before phase 2 detection by a client. For eg.,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* migration (M1, node1, node2) starts.
|
|
|
cb8e9e |
* application writes some data. DHT correctly stores the state in
|
|
|
cb8e9e |
inode context that phase-1 of migration is in progress
|
|
|
cb8e9e |
* migration M1 completes
|
|
|
cb8e9e |
* migration (M2, node2, node3) is triggered and completed
|
|
|
cb8e9e |
* application resumes writes to the file. DHT identifies it as phase-2
|
|
|
cb8e9e |
of migration. However, linkto xattr on node1 points to node2, but
|
|
|
cb8e9e |
the file is on node3. A lookup correctly identifies node3 as cached
|
|
|
cb8e9e |
subvol
|
|
|
cb8e9e |
|
|
|
cb8e9e |
TBD:
|
|
|
cb8e9e |
When we identify phase-2 of a previous migration (say M1), there
|
|
|
cb8e9e |
might be a migration in progress - say (M3, node3, node4). In this
|
|
|
cb8e9e |
case we need to send writes to both (node3, node4) not just
|
|
|
cb8e9e |
node3. Also, the inode state needs to correctly indicate that its in
|
|
|
cb8e9e |
phase-1 of migration. I'll send this as a different patch.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I1a861f766258170af2f6c0935468edb6be687b95
|
|
|
cb8e9e |
BUG: 1140506
|
|
|
cb8e9e |
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: http://review.gluster.org/10805
|
|
|
cb8e9e |
Reviewed-on: http://review.gluster.org/10965
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/50188
|
|
|
cb8e9e |
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
xlators/cluster/dht/src/dht-helper.c | 132 ++++++++---------------------------
|
|
|
cb8e9e |
1 file changed, 31 insertions(+), 101 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
|
|
|
cb8e9e |
index 3127171..1b5ad41 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/dht/src/dht-helper.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/dht/src/dht-helper.c
|
|
|
cb8e9e |
@@ -835,10 +835,9 @@ dht_migration_complete_check_task (void *data)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int ret = -1;
|
|
|
cb8e9e |
xlator_t *src_node = NULL;
|
|
|
cb8e9e |
- xlator_t *dst_node = NULL;
|
|
|
cb8e9e |
+ xlator_t *dst_node = NULL, *linkto_target = NULL;
|
|
|
cb8e9e |
dht_local_t *local = NULL;
|
|
|
cb8e9e |
dict_t *dict = NULL;
|
|
|
cb8e9e |
- dht_layout_t *layout = NULL;
|
|
|
cb8e9e |
struct iatt stbuf = {0,};
|
|
|
cb8e9e |
xlator_t *this = NULL;
|
|
|
cb8e9e |
call_frame_t *frame = NULL;
|
|
|
cb8e9e |
@@ -890,120 +889,50 @@ dht_migration_complete_check_task (void *data)
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!ret)
|
|
|
cb8e9e |
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (ret) {
|
|
|
cb8e9e |
- if (!dht_inode_missing(-ret) || (!local->loc.inode)) {
|
|
|
cb8e9e |
- local->op_errno = -ret;
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
- "%s: failed to get the 'linkto' xattr %s",
|
|
|
cb8e9e |
- local->loc.path, strerror (-ret));
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- /* Need to do lookup on hashed subvol, then get the file */
|
|
|
cb8e9e |
- ret = syncop_lookup (this, &local->loc, &stbuf, NULL,
|
|
|
cb8e9e |
- NULL, NULL);
|
|
|
cb8e9e |
- if (ret) {
|
|
|
cb8e9e |
- local->op_errno = -ret;
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- dst_node = dht_subvol_get_cached (this, local->loc.inode);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (!dst_node) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
- "%s: failed to get the destination node",
|
|
|
cb8e9e |
- local->loc.path);
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- local->op_errno = EINVAL;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ linkto_target = dht_linkfile_subvol (this, NULL, NULL, dict);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- /* lookup on dst */
|
|
|
cb8e9e |
if (local->loc.inode) {
|
|
|
cb8e9e |
- ret = syncop_lookup (dst_node, &local->loc, &stbuf, NULL,
|
|
|
cb8e9e |
- NULL, NULL);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (ret) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
- "%s: failed to lookup the file on %s",
|
|
|
cb8e9e |
- local->loc.path, dst_node->name);
|
|
|
cb8e9e |
- local->op_errno = -ret;
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (gf_uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
|
|
|
cb8e9e |
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
- DHT_MSG_GFID_MISMATCH,
|
|
|
cb8e9e |
- "%s: gfid different on the target file on %s",
|
|
|
cb8e9e |
- local->loc.path, dst_node->name);
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- local->op_errno = EIO;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ loc_copy (&tmp_loc, &local->loc);
|
|
|
cb8e9e |
} else {
|
|
|
cb8e9e |
- tmp_loc.inode = inode;
|
|
|
cb8e9e |
+ tmp_loc.inode = inode_ref (inode);
|
|
|
cb8e9e |
gf_uuid_copy (tmp_loc.gfid, inode->gfid);
|
|
|
cb8e9e |
- ret = syncop_lookup (dst_node, &tmp_loc, &stbuf, 0, 0, 0);
|
|
|
cb8e9e |
- if (ret) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
- "%s: failed to lookup the file on %s",
|
|
|
cb8e9e |
- tmp_loc.path, dst_node->name);
|
|
|
cb8e9e |
- local->op_errno = -ret;
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
|
|
|
cb8e9e |
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
- DHT_MSG_GFID_MISMATCH,
|
|
|
cb8e9e |
- "%s: gfid different on the target file on %s",
|
|
|
cb8e9e |
- tmp_loc.path, dst_node->name);
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- local->op_errno = EIO;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- /* update inode ctx (the layout) */
|
|
|
cb8e9e |
- dht_layout_unref (this, local->layout);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- ret = dht_layout_preset (this, dst_node, inode);
|
|
|
cb8e9e |
- if (ret != 0) {
|
|
|
cb8e9e |
- gf_msg_debug (this->name, 0,
|
|
|
cb8e9e |
- "%s: could not set preset layout "
|
|
|
cb8e9e |
- "for subvol %s", local->loc.path,
|
|
|
cb8e9e |
- dst_node->name);
|
|
|
cb8e9e |
- ret = -1;
|
|
|
cb8e9e |
- local->op_errno = EINVAL;
|
|
|
cb8e9e |
+ ret = syncop_lookup (this, &tmp_loc, &stbuf, 0, 0, 0);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
+ "%s: failed to lookup the file on %s (%s)",
|
|
|
cb8e9e |
+ tmp_loc.path, this->name, strerror (-ret));
|
|
|
cb8e9e |
+ local->op_errno = -ret;
|
|
|
cb8e9e |
+ ret = -1;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- layout = dht_layout_for_subvol (this, dst_node);
|
|
|
cb8e9e |
- if (!layout) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_INFO,
|
|
|
cb8e9e |
- "%s: no pre-set layout for subvolume %s",
|
|
|
cb8e9e |
- local->loc.path, dst_node ? dst_node->name : "<nil>");
|
|
|
cb8e9e |
+ if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ DHT_MSG_GFID_MISMATCH,
|
|
|
cb8e9e |
+ "%s: gfid different on the target file on %s",
|
|
|
cb8e9e |
+ tmp_loc.path, dst_node->name);
|
|
|
cb8e9e |
ret = -1;
|
|
|
cb8e9e |
- local->op_errno = EINVAL;
|
|
|
cb8e9e |
+ local->op_errno = EIO;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = dht_layout_set (this, inode, layout);
|
|
|
cb8e9e |
- if (ret) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR,
|
|
|
cb8e9e |
- "%s: failed to set the new layout",
|
|
|
cb8e9e |
- local->loc.path);
|
|
|
cb8e9e |
- local->op_errno = EINVAL;
|
|
|
cb8e9e |
- goto out;
|
|
|
cb8e9e |
+ dst_node = dht_subvol_get_cached (this, tmp_loc.inode);
|
|
|
cb8e9e |
+ if (linkto_target && dst_node != linkto_target) {
|
|
|
cb8e9e |
+ gf_log (this->name, GF_LOG_WARNING, "linkto target (%s) is "
|
|
|
cb8e9e |
+ "different from cached-subvol (%s). Treating %s as "
|
|
|
cb8e9e |
+ "destination subvol", linkto_target->name,
|
|
|
cb8e9e |
+ dst_node->name, dst_node->name);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ /* update local. A layout is set in inode-ctx in lookup already */
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ dht_layout_unref (this, local->layout);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ local->layout = dht_layout_get (frame->this, inode);
|
|
|
cb8e9e |
local->cached_subvol = dst_node;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
ret = 0;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* once we detect the migration complete, the inode-ctx2 is no more
|
|
|
cb8e9e |
@@ -1046,7 +975,6 @@ dht_migration_complete_check_task (void *data)
|
|
|
cb8e9e |
ret = -1;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
- GF_FREE (path);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
SYNCTASK_SETID (frame->root->uid, frame->root->gid);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1057,6 +985,8 @@ dht_migration_complete_check_task (void *data)
|
|
|
cb8e9e |
ret = 0;
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ loc_wipe (&tmp_loc);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.9.3
|
|
|
cb8e9e |
|