cb8e9e
From 52341e3c2b836e5b9815a546fa5364ab8c091364 Mon Sep 17 00:00:00 2001
cb8e9e
From: Raghavendra G <rgowdapp@redhat.com>
cb8e9e
Date: Wed, 13 May 2015 19:56:47 +0530
cb8e9e
Subject: [PATCH 12/18] cluster/dht: Don't rely on linkto xattr to find
cb8e9e
 destination subvol during phase 2 of migration.
cb8e9e
cb8e9e
linkto xattr on source file cannot be relied to find where the data
cb8e9e
file currently resides. This can happen if there are multiple
cb8e9e
migrations before phase 2 detection by a client. For eg.,
cb8e9e
cb8e9e
* migration (M1, node1, node2) starts.
cb8e9e
* application writes some data. DHT correctly stores the state in
cb8e9e
  inode context that phase-1 of migration is in progress
cb8e9e
* migration M1 completes
cb8e9e
* migration (M2, node2, node3) is triggered and completed
cb8e9e
* application resumes writes to the file. DHT identifies it as phase-2
cb8e9e
  of migration. However, linkto xattr on node1 points to node2, but
cb8e9e
  the file is on node3. A lookup correctly identifies node3 as cached
cb8e9e
  subvol
cb8e9e
cb8e9e
TBD:
cb8e9e
   When we identify phase-2 of a previous migration (say M1), there
cb8e9e
   might be a migration in progress - say (M3, node3, node4). In this
cb8e9e
   case we need to send writes to both (node3, node4) not just
cb8e9e
   node3. Also, the inode state needs to correctly indicate that its in
cb8e9e
   phase-1 of migration. I'll send this as a different patch.
cb8e9e
cb8e9e
Change-Id: I1a861f766258170af2f6c0935468edb6be687b95
cb8e9e
BUG: 1140506
cb8e9e
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
cb8e9e
Reviewed-on: http://review.gluster.org/10805
cb8e9e
Reviewed-on: http://review.gluster.org/10965
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/50188
cb8e9e
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
cb8e9e
---
cb8e9e
 xlators/cluster/dht/src/dht-helper.c | 132 ++++++++---------------------------
cb8e9e
 1 file changed, 31 insertions(+), 101 deletions(-)
cb8e9e
cb8e9e
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
cb8e9e
index 3127171..1b5ad41 100644
cb8e9e
--- a/xlators/cluster/dht/src/dht-helper.c
cb8e9e
+++ b/xlators/cluster/dht/src/dht-helper.c
cb8e9e
@@ -835,10 +835,9 @@ dht_migration_complete_check_task (void *data)
cb8e9e
 {
cb8e9e
         int           ret      = -1;
cb8e9e
         xlator_t     *src_node = NULL;
cb8e9e
-        xlator_t     *dst_node = NULL;
cb8e9e
+        xlator_t     *dst_node = NULL,  *linkto_target = NULL;
cb8e9e
         dht_local_t  *local    = NULL;
cb8e9e
         dict_t       *dict     = NULL;
cb8e9e
-        dht_layout_t *layout   = NULL;
cb8e9e
         struct iatt   stbuf    = {0,};
cb8e9e
         xlator_t     *this     = NULL;
cb8e9e
         call_frame_t *frame    = NULL;
cb8e9e
@@ -890,120 +889,50 @@ dht_migration_complete_check_task (void *data)
cb8e9e
         }
cb8e9e
 
cb8e9e
         if (!ret)
cb8e9e
-                dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
cb8e9e
-
cb8e9e
-        if (ret) {
cb8e9e
-                if (!dht_inode_missing(-ret) || (!local->loc.inode)) {
cb8e9e
-                        local->op_errno = -ret;
cb8e9e
-                        gf_log (this->name, GF_LOG_ERROR,
cb8e9e
-                                "%s: failed to get the 'linkto' xattr %s",
cb8e9e
-                                local->loc.path, strerror (-ret));
cb8e9e
-                        ret = -1;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
-
cb8e9e
-                /* Need to do lookup on hashed subvol, then get the file */
cb8e9e
-                ret = syncop_lookup (this, &local->loc, &stbuf, NULL,
cb8e9e
-                                     NULL, NULL);
cb8e9e
-                if (ret) {
cb8e9e
-                        local->op_errno = -ret;
cb8e9e
-                        ret = -1;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
-
cb8e9e
-                dst_node = dht_subvol_get_cached (this, local->loc.inode);
cb8e9e
-        }
cb8e9e
-
cb8e9e
-        if (!dst_node) {
cb8e9e
-                gf_log (this->name, GF_LOG_ERROR,
cb8e9e
-                        "%s: failed to get the destination node",
cb8e9e
-                        local->loc.path);
cb8e9e
-                ret = -1;
cb8e9e
-                local->op_errno = EINVAL;
cb8e9e
-                goto out;
cb8e9e
-        }
cb8e9e
+                linkto_target = dht_linkfile_subvol (this, NULL, NULL, dict);
cb8e9e
 
cb8e9e
-        /* lookup on dst */
cb8e9e
         if (local->loc.inode) {
cb8e9e
-                ret = syncop_lookup (dst_node, &local->loc, &stbuf, NULL,
cb8e9e
-                                     NULL, NULL);
cb8e9e
-
cb8e9e
-                if (ret) {
cb8e9e
-                        gf_log (this->name, GF_LOG_ERROR,
cb8e9e
-                                "%s: failed to lookup the file on %s",
cb8e9e
-                                local->loc.path, dst_node->name);
cb8e9e
-                        local->op_errno = -ret;
cb8e9e
-                        ret = -1;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
-
cb8e9e
-                if (gf_uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
cb8e9e
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
cb8e9e
-                                DHT_MSG_GFID_MISMATCH,
cb8e9e
-                                "%s: gfid different on the target file on %s",
cb8e9e
-                                local->loc.path, dst_node->name);
cb8e9e
-                        ret = -1;
cb8e9e
-                        local->op_errno = EIO;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
+                loc_copy (&tmp_loc, &local->loc);
cb8e9e
         } else {
cb8e9e
-                tmp_loc.inode = inode;
cb8e9e
+                tmp_loc.inode = inode_ref (inode);
cb8e9e
                 gf_uuid_copy (tmp_loc.gfid, inode->gfid);
cb8e9e
-                ret = syncop_lookup (dst_node, &tmp_loc, &stbuf, 0, 0, 0);
cb8e9e
-                if (ret) {
cb8e9e
-                        gf_log (this->name, GF_LOG_ERROR,
cb8e9e
-                                "%s: failed to lookup the file on %s",
cb8e9e
-                                tmp_loc.path, dst_node->name);
cb8e9e
-                        local->op_errno = -ret;
cb8e9e
-                        ret = -1;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
-
cb8e9e
-                if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
cb8e9e
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
cb8e9e
-                                DHT_MSG_GFID_MISMATCH,
cb8e9e
-                                "%s: gfid different on the target file on %s",
cb8e9e
-                                tmp_loc.path, dst_node->name);
cb8e9e
-                        ret = -1;
cb8e9e
-                        local->op_errno = EIO;
cb8e9e
-                        goto out;
cb8e9e
-                }
cb8e9e
         }
cb8e9e
 
cb8e9e
-        /* update inode ctx (the layout) */
cb8e9e
-        dht_layout_unref (this, local->layout);
cb8e9e
-
cb8e9e
-        ret = dht_layout_preset (this, dst_node, inode);
cb8e9e
-        if (ret != 0) {
cb8e9e
-                gf_msg_debug (this->name, 0,
cb8e9e
-                              "%s: could not set preset layout "
cb8e9e
-                              "for subvol %s", local->loc.path,
cb8e9e
-                              dst_node->name);
cb8e9e
-                ret   = -1;
cb8e9e
-                local->op_errno = EINVAL;
cb8e9e
+        ret = syncop_lookup (this, &tmp_loc, &stbuf, 0, 0, 0);
cb8e9e
+        if (ret) {
cb8e9e
+                gf_log (this->name, GF_LOG_ERROR,
cb8e9e
+                        "%s: failed to lookup the file on %s (%s)",
cb8e9e
+                        tmp_loc.path, this->name, strerror (-ret));
cb8e9e
+                local->op_errno = -ret;
cb8e9e
+                ret = -1;
cb8e9e
                 goto out;
cb8e9e
         }
cb8e9e
 
cb8e9e
-        layout = dht_layout_for_subvol (this, dst_node);
cb8e9e
-        if (!layout) {
cb8e9e
-                gf_log (this->name, GF_LOG_INFO,
cb8e9e
-                        "%s: no pre-set layout for subvolume %s",
cb8e9e
-                        local->loc.path, dst_node ? dst_node->name : "<nil>");
cb8e9e
+        if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
cb8e9e
+                gf_msg (this->name, GF_LOG_ERROR, 0,
cb8e9e
+                        DHT_MSG_GFID_MISMATCH,
cb8e9e
+                        "%s: gfid different on the target file on %s",
cb8e9e
+                        tmp_loc.path, dst_node->name);
cb8e9e
                 ret = -1;
cb8e9e
-                local->op_errno = EINVAL;
cb8e9e
+                local->op_errno = EIO;
cb8e9e
                 goto out;
cb8e9e
         }
cb8e9e
 
cb8e9e
-        ret = dht_layout_set (this, inode, layout);
cb8e9e
-        if (ret) {
cb8e9e
-                gf_log (this->name, GF_LOG_ERROR,
cb8e9e
-                        "%s: failed to set the new layout",
cb8e9e
-                        local->loc.path);
cb8e9e
-                local->op_errno = EINVAL;
cb8e9e
-                goto out;
cb8e9e
+        dst_node = dht_subvol_get_cached (this, tmp_loc.inode);
cb8e9e
+        if (linkto_target && dst_node != linkto_target) {
cb8e9e
+                gf_log (this->name, GF_LOG_WARNING, "linkto target (%s) is "
cb8e9e
+                        "different from cached-subvol (%s). Treating %s as "
cb8e9e
+                        "destination subvol", linkto_target->name,
cb8e9e
+                        dst_node->name, dst_node->name);
cb8e9e
         }
cb8e9e
 
cb8e9e
+        /* update local. A layout is set in inode-ctx in lookup already */
cb8e9e
+
cb8e9e
+        dht_layout_unref (this, local->layout);
cb8e9e
+
cb8e9e
+        local->layout   = dht_layout_get (frame->this, inode);
cb8e9e
         local->cached_subvol = dst_node;
cb8e9e
+
cb8e9e
         ret = 0;
cb8e9e
 
cb8e9e
         /* once we detect the migration complete, the inode-ctx2 is no more
cb8e9e
@@ -1046,7 +975,6 @@ dht_migration_complete_check_task (void *data)
cb8e9e
                         ret = -1;
cb8e9e
                 }
cb8e9e
         }
cb8e9e
-        GF_FREE (path);
cb8e9e
 
cb8e9e
         SYNCTASK_SETID (frame->root->uid, frame->root->gid);
cb8e9e
 
cb8e9e
@@ -1057,6 +985,8 @@ dht_migration_complete_check_task (void *data)
cb8e9e
         ret = 0;
cb8e9e
 out:
cb8e9e
 
cb8e9e
+        loc_wipe (&tmp_loc);
cb8e9e
+
cb8e9e
         return ret;
cb8e9e
 }
cb8e9e
 
cb8e9e
-- 
cb8e9e
1.9.3
cb8e9e