7f4c2a
From 52341e3c2b836e5b9815a546fa5364ab8c091364 Mon Sep 17 00:00:00 2001
7f4c2a
From: Raghavendra G <rgowdapp@redhat.com>
7f4c2a
Date: Wed, 13 May 2015 19:56:47 +0530
7f4c2a
Subject: [PATCH 12/18] cluster/dht: Don't rely on linkto xattr to find
7f4c2a
 destination subvol during phase 2 of migration.
7f4c2a
7f4c2a
linkto xattr on source file cannot be relied to find where the data
7f4c2a
file currently resides. This can happen if there are multiple
7f4c2a
migrations before phase 2 detection by a client. For eg.,
7f4c2a
7f4c2a
* migration (M1, node1, node2) starts.
7f4c2a
* application writes some data. DHT correctly stores the state in
7f4c2a
  inode context that phase-1 of migration is in progress
7f4c2a
* migration M1 completes
7f4c2a
* migration (M2, node2, node3) is triggered and completed
7f4c2a
* application resumes writes to the file. DHT identifies it as phase-2
7f4c2a
  of migration. However, linkto xattr on node1 points to node2, but
7f4c2a
  the file is on node3. A lookup correctly identifies node3 as cached
7f4c2a
  subvol
7f4c2a
7f4c2a
TBD:
7f4c2a
   When we identify phase-2 of a previous migration (say M1), there
7f4c2a
   might be a migration in progress - say (M3, node3, node4). In this
7f4c2a
   case we need to send writes to both (node3, node4) not just
7f4c2a
   node3. Also, the inode state needs to correctly indicate that its in
7f4c2a
   phase-1 of migration. I'll send this as a different patch.
7f4c2a
7f4c2a
Change-Id: I1a861f766258170af2f6c0935468edb6be687b95
7f4c2a
BUG: 1140506
7f4c2a
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
7f4c2a
Reviewed-on: http://review.gluster.org/10805
7f4c2a
Reviewed-on: http://review.gluster.org/10965
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/50188
7f4c2a
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
7f4c2a
---
7f4c2a
 xlators/cluster/dht/src/dht-helper.c | 132 ++++++++---------------------------
7f4c2a
 1 file changed, 31 insertions(+), 101 deletions(-)
7f4c2a
7f4c2a
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
7f4c2a
index 3127171..1b5ad41 100644
7f4c2a
--- a/xlators/cluster/dht/src/dht-helper.c
7f4c2a
+++ b/xlators/cluster/dht/src/dht-helper.c
7f4c2a
@@ -835,10 +835,9 @@ dht_migration_complete_check_task (void *data)
7f4c2a
 {
7f4c2a
         int           ret      = -1;
7f4c2a
         xlator_t     *src_node = NULL;
7f4c2a
-        xlator_t     *dst_node = NULL;
7f4c2a
+        xlator_t     *dst_node = NULL,  *linkto_target = NULL;
7f4c2a
         dht_local_t  *local    = NULL;
7f4c2a
         dict_t       *dict     = NULL;
7f4c2a
-        dht_layout_t *layout   = NULL;
7f4c2a
         struct iatt   stbuf    = {0,};
7f4c2a
         xlator_t     *this     = NULL;
7f4c2a
         call_frame_t *frame    = NULL;
7f4c2a
@@ -890,120 +889,50 @@ dht_migration_complete_check_task (void *data)
7f4c2a
         }
7f4c2a
 
7f4c2a
         if (!ret)
7f4c2a
-                dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
7f4c2a
-
7f4c2a
-        if (ret) {
7f4c2a
-                if (!dht_inode_missing(-ret) || (!local->loc.inode)) {
7f4c2a
-                        local->op_errno = -ret;
7f4c2a
-                        gf_log (this->name, GF_LOG_ERROR,
7f4c2a
-                                "%s: failed to get the 'linkto' xattr %s",
7f4c2a
-                                local->loc.path, strerror (-ret));
7f4c2a
-                        ret = -1;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
-
7f4c2a
-                /* Need to do lookup on hashed subvol, then get the file */
7f4c2a
-                ret = syncop_lookup (this, &local->loc, &stbuf, NULL,
7f4c2a
-                                     NULL, NULL);
7f4c2a
-                if (ret) {
7f4c2a
-                        local->op_errno = -ret;
7f4c2a
-                        ret = -1;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
-
7f4c2a
-                dst_node = dht_subvol_get_cached (this, local->loc.inode);
7f4c2a
-        }
7f4c2a
-
7f4c2a
-        if (!dst_node) {
7f4c2a
-                gf_log (this->name, GF_LOG_ERROR,
7f4c2a
-                        "%s: failed to get the destination node",
7f4c2a
-                        local->loc.path);
7f4c2a
-                ret = -1;
7f4c2a
-                local->op_errno = EINVAL;
7f4c2a
-                goto out;
7f4c2a
-        }
7f4c2a
+                linkto_target = dht_linkfile_subvol (this, NULL, NULL, dict);
7f4c2a
 
7f4c2a
-        /* lookup on dst */
7f4c2a
         if (local->loc.inode) {
7f4c2a
-                ret = syncop_lookup (dst_node, &local->loc, &stbuf, NULL,
7f4c2a
-                                     NULL, NULL);
7f4c2a
-
7f4c2a
-                if (ret) {
7f4c2a
-                        gf_log (this->name, GF_LOG_ERROR,
7f4c2a
-                                "%s: failed to lookup the file on %s",
7f4c2a
-                                local->loc.path, dst_node->name);
7f4c2a
-                        local->op_errno = -ret;
7f4c2a
-                        ret = -1;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
-
7f4c2a
-                if (gf_uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
7f4c2a
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
-                                DHT_MSG_GFID_MISMATCH,
7f4c2a
-                                "%s: gfid different on the target file on %s",
7f4c2a
-                                local->loc.path, dst_node->name);
7f4c2a
-                        ret = -1;
7f4c2a
-                        local->op_errno = EIO;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
+                loc_copy (&tmp_loc, &local->loc);
7f4c2a
         } else {
7f4c2a
-                tmp_loc.inode = inode;
7f4c2a
+                tmp_loc.inode = inode_ref (inode);
7f4c2a
                 gf_uuid_copy (tmp_loc.gfid, inode->gfid);
7f4c2a
-                ret = syncop_lookup (dst_node, &tmp_loc, &stbuf, 0, 0, 0);
7f4c2a
-                if (ret) {
7f4c2a
-                        gf_log (this->name, GF_LOG_ERROR,
7f4c2a
-                                "%s: failed to lookup the file on %s",
7f4c2a
-                                tmp_loc.path, dst_node->name);
7f4c2a
-                        local->op_errno = -ret;
7f4c2a
-                        ret = -1;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
-
7f4c2a
-                if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
7f4c2a
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
-                                DHT_MSG_GFID_MISMATCH,
7f4c2a
-                                "%s: gfid different on the target file on %s",
7f4c2a
-                                tmp_loc.path, dst_node->name);
7f4c2a
-                        ret = -1;
7f4c2a
-                        local->op_errno = EIO;
7f4c2a
-                        goto out;
7f4c2a
-                }
7f4c2a
         }
7f4c2a
 
7f4c2a
-        /* update inode ctx (the layout) */
7f4c2a
-        dht_layout_unref (this, local->layout);
7f4c2a
-
7f4c2a
-        ret = dht_layout_preset (this, dst_node, inode);
7f4c2a
-        if (ret != 0) {
7f4c2a
-                gf_msg_debug (this->name, 0,
7f4c2a
-                              "%s: could not set preset layout "
7f4c2a
-                              "for subvol %s", local->loc.path,
7f4c2a
-                              dst_node->name);
7f4c2a
-                ret   = -1;
7f4c2a
-                local->op_errno = EINVAL;
7f4c2a
+        ret = syncop_lookup (this, &tmp_loc, &stbuf, 0, 0, 0);
7f4c2a
+        if (ret) {
7f4c2a
+                gf_log (this->name, GF_LOG_ERROR,
7f4c2a
+                        "%s: failed to lookup the file on %s (%s)",
7f4c2a
+                        tmp_loc.path, this->name, strerror (-ret));
7f4c2a
+                local->op_errno = -ret;
7f4c2a
+                ret = -1;
7f4c2a
                 goto out;
7f4c2a
         }
7f4c2a
 
7f4c2a
-        layout = dht_layout_for_subvol (this, dst_node);
7f4c2a
-        if (!layout) {
7f4c2a
-                gf_log (this->name, GF_LOG_INFO,
7f4c2a
-                        "%s: no pre-set layout for subvolume %s",
7f4c2a
-                        local->loc.path, dst_node ? dst_node->name : "<nil>");
7f4c2a
+        if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
7f4c2a
+                gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
+                        DHT_MSG_GFID_MISMATCH,
7f4c2a
+                        "%s: gfid different on the target file on %s",
7f4c2a
+                        tmp_loc.path, dst_node->name);
7f4c2a
                 ret = -1;
7f4c2a
-                local->op_errno = EINVAL;
7f4c2a
+                local->op_errno = EIO;
7f4c2a
                 goto out;
7f4c2a
         }
7f4c2a
 
7f4c2a
-        ret = dht_layout_set (this, inode, layout);
7f4c2a
-        if (ret) {
7f4c2a
-                gf_log (this->name, GF_LOG_ERROR,
7f4c2a
-                        "%s: failed to set the new layout",
7f4c2a
-                        local->loc.path);
7f4c2a
-                local->op_errno = EINVAL;
7f4c2a
-                goto out;
7f4c2a
+        dst_node = dht_subvol_get_cached (this, tmp_loc.inode);
7f4c2a
+        if (linkto_target && dst_node != linkto_target) {
7f4c2a
+                gf_log (this->name, GF_LOG_WARNING, "linkto target (%s) is "
7f4c2a
+                        "different from cached-subvol (%s). Treating %s as "
7f4c2a
+                        "destination subvol", linkto_target->name,
7f4c2a
+                        dst_node->name, dst_node->name);
7f4c2a
         }
7f4c2a
 
7f4c2a
+        /* update local. A layout is set in inode-ctx in lookup already */
7f4c2a
+
7f4c2a
+        dht_layout_unref (this, local->layout);
7f4c2a
+
7f4c2a
+        local->layout   = dht_layout_get (frame->this, inode);
7f4c2a
         local->cached_subvol = dst_node;
7f4c2a
+
7f4c2a
         ret = 0;
7f4c2a
 
7f4c2a
         /* once we detect the migration complete, the inode-ctx2 is no more
7f4c2a
@@ -1046,7 +975,6 @@ dht_migration_complete_check_task (void *data)
7f4c2a
                         ret = -1;
7f4c2a
                 }
7f4c2a
         }
7f4c2a
-        GF_FREE (path);
7f4c2a
 
7f4c2a
         SYNCTASK_SETID (frame->root->uid, frame->root->gid);
7f4c2a
 
7f4c2a
@@ -1057,6 +985,8 @@ dht_migration_complete_check_task (void *data)
7f4c2a
         ret = 0;
7f4c2a
 out:
7f4c2a
 
7f4c2a
+        loc_wipe (&tmp_loc);
7f4c2a
+
7f4c2a
         return ret;
7f4c2a
 }
7f4c2a
 
7f4c2a
-- 
7f4c2a
1.9.3
7f4c2a