d1681e
From 4f5197f585ce4117e29e6b6af0e6d91c19eb34ea Mon Sep 17 00:00:00 2001
d1681e
From: N Balachandran <nbalacha@redhat.com>
d1681e
Date: Wed, 3 Jan 2018 10:36:58 +0530
d1681e
Subject: [PATCH 142/148] cluster/dht: Add migration checks to dht_(f)xattrop
d1681e
d1681e
The dht_(f)xattrop implementation did not implement
d1681e
migration phase1/phase2 checks which could cause issues
d1681e
with rebalance on sharded volumes.
d1681e
This does not solve the issue where fops may reach the target
d1681e
out of order.
d1681e
d1681e
upstream : https://review.gluster.org/#/c/17776
d1681e
d1681e
> Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c
d1681e
> BUG: 1471031
d1681e
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
d1681e
d1681e
Change-Id: I95b453421809c543ba8e4febd9a12c84e9439a29
d1681e
BUG: 1530146
d1681e
Signed-off-by: N Balachandran <nbalacha@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/126959
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
d1681e
---
d1681e
 libglusterfs/src/glusterfs.h              |   1 +
d1681e
 xlators/cluster/dht/src/dht-common.c      |  48 +++++-
d1681e
 xlators/cluster/dht/src/dht-common.h      |  10 ++
d1681e
 xlators/cluster/dht/src/dht-helper.c      |   3 +
d1681e
 xlators/cluster/dht/src/dht-inode-read.c  | 241 +++++++++++++++++++++++++++---
d1681e
 xlators/cluster/dht/src/dht-rebalance.c   |  86 +++++------
d1681e
 xlators/cluster/dht/src/dht-selfheal.c    |   1 -
d1681e
 xlators/storage/posix/src/posix-helpers.c |  31 ++++
d1681e
 xlators/storage/posix/src/posix.c         |   2 +
d1681e
 xlators/storage/posix/src/posix.h         |   4 +
d1681e
 10 files changed, 366 insertions(+), 61 deletions(-)
d1681e
d1681e
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
d1681e
index 18256aa..c8835d9 100644
d1681e
--- a/libglusterfs/src/glusterfs.h
d1681e
+++ b/libglusterfs/src/glusterfs.h
d1681e
@@ -272,6 +272,7 @@
d1681e
 #define TIER_LINKFILE_GFID           "tier-linkfile-gfid"
d1681e
 #define DHT_SKIP_OPEN_FD_UNLINK     "dont-unlink-for-open-fd"
d1681e
 #define DHT_IATT_IN_XDATA_KEY       "dht-get-iatt-in-xattr"
d1681e
+#define DHT_MODE_IN_XDATA_KEY       "dht-get-mode-in-xattr"
d1681e
 #define GET_LINK_COUNT              "get-link-count"
d1681e
 #define GF_GET_SIZE                 "get-size"
d1681e
 
d1681e
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
d1681e
index b55cb36..c2d0827 100644
d1681e
--- a/xlators/cluster/dht/src/dht-common.c
d1681e
+++ b/xlators/cluster/dht/src/dht-common.c
d1681e
@@ -18,7 +18,6 @@
d1681e
 #include "dht-lock.h"
d1681e
 #include "defaults.h"
d1681e
 #include "byte-order.h"
d1681e
-#include "glusterfs-acl.h"
d1681e
 #include "quota-common-utils.h"
d1681e
 #include "upcall-utils.h"
d1681e
 
d1681e
@@ -46,6 +45,11 @@ int
d1681e
 dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
d1681e
 
d1681e
 
d1681e
+int
d1681e
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
d1681e
+                        dict_t *xdata);
d1681e
+
d1681e
 
d1681e
 /* Sets the blocks and size values to fixed values. This is to be called
d1681e
  * only for dirs. The caller is responsible for checking the type
d1681e
@@ -61,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat)
d1681e
 }
d1681e
 
d1681e
 
d1681e
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
d1681e
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
d1681e
+ * DHT_IATT_IN_XDATA_KEY
d1681e
+ */
d1681e
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req)
d1681e
+{
d1681e
+        int ret = -1;
d1681e
+
d1681e
+        ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
d1681e
+        ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
d1681e
+
d1681e
+        /* At least one call succeeded */
d1681e
+        return ret;
d1681e
+}
d1681e
+
d1681e
+
d1681e
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
d1681e
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
d1681e
+ * DHT_IATT_IN_XDATA_KEY
d1681e
+ * This will return a dummy iatt with only the mode and type set
d1681e
+ */
d1681e
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
d1681e
+                              struct iatt *stbuf)
d1681e
+{
d1681e
+        int ret = -1;
d1681e
+        int32_t mode = 0;
d1681e
+
d1681e
+        ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode);
d1681e
+
d1681e
+        if (ret) {
d1681e
+                ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY,
d1681e
+                                    (void **)&stbuf);
d1681e
+        } else {
d1681e
+                stbuf->ia_prot = ia_prot_from_st_mode (mode);
d1681e
+                stbuf->ia_type = ia_type_from_st_mode (mode);
d1681e
+        }
d1681e
+
d1681e
+        return ret;
d1681e
+}
d1681e
+
d1681e
+
d1681e
+
d1681e
 int
d1681e
 dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
d1681e
 
d1681e
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
d1681e
index e2afd6c..47a2e23 100644
d1681e
--- a/xlators/cluster/dht/src/dht-common.h
d1681e
+++ b/xlators/cluster/dht/src/dht-common.h
d1681e
@@ -20,6 +20,7 @@
d1681e
 #include "refcount.h"
d1681e
 #include "timer.h"
d1681e
 #include "protocol-common.h"
d1681e
+#include "glusterfs-acl.h"
d1681e
 
d1681e
 #ifndef _DHT_H
d1681e
 #define _DHT_H
d1681e
@@ -146,6 +147,7 @@ struct dht_rebalance_ {
d1681e
         dht_defrag_cbk_fn_t  target_op_fn;
d1681e
         dict_t              *xdata;
d1681e
         dict_t              *xattr;
d1681e
+        dict_t              *dict;
d1681e
         int32_t              set;
d1681e
         struct gf_flock      flock;
d1681e
         int                  lock_cmd;
d1681e
@@ -1416,4 +1418,12 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
 int
d1681e
 dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
                        int op_ret, int op_errno, dict_t *xdata);
d1681e
+
d1681e
+/* Abstract out the DHT-IATT-IN-DICT */
d1681e
+
d1681e
+
d1681e
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req);
d1681e
+
d1681e
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
d1681e
+                              struct iatt *stbuf);
d1681e
 #endif/* _DHT_H */
d1681e
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
d1681e
index cca2bfe..e56a085 100644
d1681e
--- a/xlators/cluster/dht/src/dht-helper.c
d1681e
+++ b/xlators/cluster/dht/src/dht-helper.c
d1681e
@@ -797,6 +797,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
d1681e
         if (local->rebalance.xattr)
d1681e
                 dict_unref (local->rebalance.xattr);
d1681e
 
d1681e
+        if (local->rebalance.dict)
d1681e
+                dict_unref (local->rebalance.dict);
d1681e
+
d1681e
         GF_FREE (local->rebalance.vector);
d1681e
 
d1681e
         if (local->rebalance.iobref)
d1681e
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
d1681e
index a9e4766..fa63fef 100644
d1681e
--- a/xlators/cluster/dht/src/dht-inode-read.c
d1681e
+++ b/xlators/cluster/dht/src/dht-inode-read.c
d1681e
@@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node,
d1681e
              call_frame_t *frame, int ret);
d1681e
 int dht_fsync2 (xlator_t *this, xlator_t *dst_node,
d1681e
                 call_frame_t *frame, int ret);
d1681e
-
d1681e
-
d1681e
+int
d1681e
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
d1681e
+                     int ret);
d1681e
 
d1681e
 int
d1681e
 dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
@@ -1246,13 +1247,163 @@ err:
d1681e
         return 0;
d1681e
 }
d1681e
 
d1681e
-/* Currently no translators on top of 'distribute' will be using
d1681e
- * below fops, hence not implementing 'migration' related checks
d1681e
- */
d1681e
+
d1681e
+int
d1681e
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
d1681e
+                        dict_t *xdata)
d1681e
+{
d1681e
+        dht_local_t  *local          = NULL;
d1681e
+        call_frame_t *call_frame     = NULL;
d1681e
+        xlator_t     *prev           = NULL;
d1681e
+        xlator_t     *src_subvol     = NULL;
d1681e
+        xlator_t     *dst_subvol     = NULL;
d1681e
+        struct iatt   stbuf          = {0,};
d1681e
+        int           ret            = -1;
d1681e
+        inode_t      *inode          = NULL;
d1681e
+
d1681e
+        local = frame->local;
d1681e
+        call_frame = cookie;
d1681e
+        prev = call_frame->this;
d1681e
+
d1681e
+        local->op_errno = op_errno;
d1681e
+
d1681e
+        if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
d1681e
+                gf_msg_debug (this->name, op_errno,
d1681e
+                              "subvolume %s returned -1.",
d1681e
+                              prev->name);
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+        if (local->call_cnt != 1)
d1681e
+                goto out;
d1681e
+
d1681e
+        ret = dht_read_iatt_from_xdata (this, xdata, &stbuf);
d1681e
+
d1681e
+        if ((!op_ret) && (ret)) {
d1681e
+                /* This is a potential problem and can cause corruption
d1681e
+                 * with sharding.
d1681e
+                 * Oh well. We tried.
d1681e
+                 */
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+        local->op_ret = op_ret;
d1681e
+        local->rebalance.target_op_fn = dht_common_xattrop2;
d1681e
+        if (xdata)
d1681e
+                local->rebalance.xdata = dict_ref (xdata);
d1681e
+
d1681e
+        if (dict)
d1681e
+                local->rebalance.dict = dict_ref (dict);
d1681e
+
d1681e
+        /* Phase 2 of migration */
d1681e
+        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) {
d1681e
+                ret = dht_rebalance_complete_check (this, frame);
d1681e
+                if (!ret)
d1681e
+                        return 0;
d1681e
+        }
d1681e
+
d1681e
+        /* Check if the rebalance phase1 is true */
d1681e
+        if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) {
d1681e
+
d1681e
+                inode = local->loc.inode ? local->loc.inode : local->fd->inode;
d1681e
+                dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
d1681e
+                                            &dst_subvol);
d1681e
+
d1681e
+                if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
d1681e
+                                             dst_subvol) ||
d1681e
+                      !dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
d1681e
+
d1681e
+                        ret = dht_rebalance_in_progress_check (this, frame);
d1681e
+                        if (!ret)
d1681e
+                                return 0;
d1681e
+                } else {
d1681e
+                        dht_common_xattrop2 (this, dst_subvol, frame, 0);
d1681e
+                        return 0;
d1681e
+                }
d1681e
+        }
d1681e
+
d1681e
+
d1681e
+out:
d1681e
+        if (local->fop == GF_FOP_XATTROP) {
d1681e
+                DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno,
d1681e
+                                  dict, xdata);
d1681e
+        } else {
d1681e
+                DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno,
d1681e
+                                  dict, xdata);
d1681e
+        }
d1681e
+
d1681e
+        return 0;
d1681e
+}
d1681e
+
d1681e
+
d1681e
+int
d1681e
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
d1681e
+                     int ret)
d1681e
+{
d1681e
+        dht_local_t *local    = NULL;
d1681e
+        int32_t      op_errno = EINVAL;
d1681e
+
d1681e
+        if ((frame == NULL) || (frame->local == NULL))
d1681e
+                goto out;
d1681e
+
d1681e
+        local = frame->local;
d1681e
+        op_errno = local->op_errno;
d1681e
+
d1681e
+        if (we_are_not_migrating (ret)) {
d1681e
+                /* This dht xlator is not migrating the file. Unwind and
d1681e
+                 * pass on the original mode bits so the higher DHT layer
d1681e
+                 * can handle this.
d1681e
+                 */
d1681e
+                if (local->fop == GF_FOP_XATTROP) {
d1681e
+                        DHT_STACK_UNWIND (xattrop, frame, local->op_ret,
d1681e
+                                          op_errno, local->rebalance.dict,
d1681e
+                                          local->rebalance.xdata);
d1681e
+                } else {
d1681e
+                        DHT_STACK_UNWIND (fxattrop, frame, local->op_ret,
d1681e
+                                          op_errno, local->rebalance.dict,
d1681e
+                                          local->rebalance.xdata);
d1681e
+                }
d1681e
+
d1681e
+                return 0;
d1681e
+        }
d1681e
+
d1681e
+        if (subvol == NULL)
d1681e
+                goto out;
d1681e
+
d1681e
+        local->call_cnt = 2; /* This is the second attempt */
d1681e
+
d1681e
+        if (local->fop == GF_FOP_XATTROP) {
d1681e
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
d1681e
+                            subvol->fops->xattrop, &local->loc,
d1681e
+                            local->rebalance.flags, local->rebalance.xattr,
d1681e
+                            local->xattr_req);
d1681e
+        } else {
d1681e
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
d1681e
+                            subvol->fops->fxattrop, local->fd,
d1681e
+                            local->rebalance.flags, local->rebalance.xattr,
d1681e
+                            local->xattr_req);
d1681e
+        }
d1681e
+
d1681e
+        return 0;
d1681e
+
d1681e
+out:
d1681e
+
d1681e
+        /* If local is unavailable we could be unwinding the wrong
d1681e
+         * function here */
d1681e
+
d1681e
+        if (local && (local->fop == GF_FOP_XATTROP)) {
d1681e
+                DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
d1681e
+        } else {
d1681e
+                DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
d1681e
+        }
d1681e
+        return 0;
d1681e
+}
d1681e
+
d1681e
 
d1681e
 int
d1681e
 dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
-                 int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
d1681e
+                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
d1681e
 {
d1681e
         DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
d1681e
         return 0;
d1681e
@@ -1263,9 +1414,10 @@ int
d1681e
 dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
d1681e
              gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
d1681e
 {
d1681e
-        xlator_t     *subvol = NULL;
d1681e
+        xlator_t     *subvol   = NULL;
d1681e
         int           op_errno = -1;
d1681e
-        dht_local_t  *local = NULL;
d1681e
+        dht_local_t  *local    = NULL;
d1681e
+        int           ret      = -1;
d1681e
 
d1681e
         VALIDATE_OR_GOTO (frame, err);
d1681e
         VALIDATE_OR_GOTO (this, err);
d1681e
@@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
d1681e
                 goto err;
d1681e
         }
d1681e
 
d1681e
-        local->call_cnt = 1;
d1681e
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
d1681e
+         * that uses xattrop is sharding and that is only for files */
d1681e
+
d1681e
+        if (IA_ISDIR (loc->inode->ia_type)) {
d1681e
+                STACK_WIND (frame, dht_xattrop_cbk, subvol,
d1681e
+                            subvol->fops->xattrop, loc, flags, dict, xdata);
d1681e
+
d1681e
+        } else {
d1681e
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
d1681e
+                local->call_cnt = 1;
d1681e
 
d1681e
-        STACK_WIND (frame, dht_xattrop_cbk,
d1681e
-                    subvol, subvol->fops->xattrop,
d1681e
-                    loc, flags, dict, xdata);
d1681e
+                local->rebalance.xattr = dict_ref (dict);
d1681e
+                local->rebalance.flags = flags;
d1681e
+
d1681e
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
d1681e
+
d1681e
+                if (ret) {
d1681e
+                        gf_msg_debug (this->name, 0,
d1681e
+                                      "Failed to set dictionary key %s file=%s",
d1681e
+                                      DHT_IATT_IN_XDATA_KEY, loc->path);
d1681e
+                }
d1681e
+
d1681e
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
d1681e
+                            subvol->fops->xattrop, loc,
d1681e
+                            local->rebalance.flags, local->rebalance.xattr,
d1681e
+                            local->xattr_req);
d1681e
+        }
d1681e
 
d1681e
         return 0;
d1681e
 
d1681e
@@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
d1681e
 {
d1681e
         xlator_t     *subvol = NULL;
d1681e
         int           op_errno = -1;
d1681e
+        dht_local_t  *local    = NULL;
d1681e
+        int           ret      = -1;
d1681e
 
d1681e
         VALIDATE_OR_GOTO (frame, err);
d1681e
         VALIDATE_OR_GOTO (this, err);
d1681e
@@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
d1681e
                 goto err;
d1681e
         }
d1681e
 
d1681e
-        STACK_WIND (frame,
d1681e
-                    dht_fxattrop_cbk,
d1681e
-                    subvol, subvol->fops->fxattrop,
d1681e
-                    fd, flags, dict, xdata);
d1681e
+        local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP);
d1681e
+        if (!local) {
d1681e
+                op_errno = ENOMEM;
d1681e
+                goto err;
d1681e
+        }
d1681e
+
d1681e
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
d1681e
+         * that uses xattrop is sharding and that is only for files */
d1681e
+
d1681e
+        if (IA_ISDIR (fd->inode->ia_type)) {
d1681e
+                STACK_WIND (frame, dht_fxattrop_cbk, subvol,
d1681e
+                            subvol->fops->fxattrop, fd, flags, dict, xdata);
d1681e
+
d1681e
+        } else {
d1681e
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
d1681e
+                local->call_cnt = 1;
d1681e
+
d1681e
+                local->rebalance.xattr = dict_ref (dict);
d1681e
+                local->rebalance.flags = flags;
d1681e
+
d1681e
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
d1681e
+
d1681e
+                if (ret) {
d1681e
+                        gf_msg_debug (this->name, 0,
d1681e
+                                      "Failed to set dictionary key %s fd=%p",
d1681e
+                                      DHT_IATT_IN_XDATA_KEY, fd);
d1681e
+                }
d1681e
+
d1681e
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
d1681e
+                            subvol->fops->fxattrop, fd,
d1681e
+                            local->rebalance.flags, local->rebalance.xattr,
d1681e
+                            local->xattr_req);
d1681e
+        }
d1681e
 
d1681e
         return 0;
d1681e
 
d1681e
@@ -1345,6 +1550,9 @@ err:
d1681e
         return 0;
d1681e
 }
d1681e
 
d1681e
+/* Currently no translators on top of 'distribute' will be using
d1681e
+ * below fops, hence not implementing 'migration' related checks
d1681e
+ */
d1681e
 
d1681e
 int
d1681e
 dht_inodelk_cbk (call_frame_t *frame, void *cookie,
d1681e
@@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
                   int32_t op_ret, int32_t op_errno, dict_t *xdata)
d1681e
 
d1681e
 {
d1681e
-
d1681e
         dht_lk_inode_unref (frame, op_ret);
d1681e
         DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
d1681e
         return 0;
d1681e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
d1681e
index ae367d7..3343a2b 100644
d1681e
--- a/xlators/cluster/dht/src/dht-rebalance.c
d1681e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
d1681e
@@ -168,7 +168,7 @@ dht_strip_out_acls (dict_t *dict)
d1681e
 {
d1681e
         if (dict) {
d1681e
                 dict_del (dict, "trusted.SGI_ACL_FILE");
d1681e
-                dict_del (dict, "POSIX_ACL_ACCESS_XATTR");
d1681e
+                dict_del (dict, POSIX_ACL_ACCESS_XATTR);
d1681e
         }
d1681e
 }
d1681e
 
d1681e
@@ -665,7 +665,7 @@ out:
d1681e
 static int
d1681e
 __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
d1681e
                                  loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
d1681e
-                                 dict_t *xattr, int *fop_errno)
d1681e
+                                 int *fop_errno)
d1681e
 {
d1681e
         int          ret  = -1;
d1681e
         fd_t        *fd   = NULL;
d1681e
@@ -810,28 +810,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
d1681e
                 goto out;
d1681e
         }
d1681e
 
d1681e
-        ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
d1681e
-        if (ret < 0) {
d1681e
-                *fop_errno = -ret;
d1681e
-                gf_msg (this->name, GF_LOG_WARNING, -ret,
d1681e
-                        DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
-                        "%s: failed to set xattr on %s",
d1681e
-                        loc->path, to->name);
d1681e
-
d1681e
-        }
d1681e
-
d1681e
-        /* TODO: Need to add a detailed comment about why we moved away from
d1681e
-        ftruncate.
d1681e
-
d1681e
-        ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
d1681e
-        if (ret < 0) {
d1681e
-                *fop_errno = -ret;
d1681e
-                gf_msg (this->name, GF_LOG_ERROR, -ret,
d1681e
-                        DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
-                        "ftruncate failed for %s on %s",
d1681e
-                        loc->path, to->name);
d1681e
-        */
d1681e
-
d1681e
         ret = syncop_fsetattr (to, fd, stbuf,
d1681e
                                (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
d1681e
                                 NULL, NULL, NULL, NULL);
d1681e
@@ -1620,24 +1598,10 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
d1681e
         }
d1681e
 
d1681e
 
d1681e
-        /* TODO: move all xattr related operations to fd based operations */
d1681e
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
d1681e
-        if (ret < 0) {
d1681e
-                *fop_errno = -ret;
d1681e
-                ret = -1;
d1681e
-                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
d1681e
-                        DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
-                        "Migrate file failed:"
d1681e
-                        "%s: failed to get xattr from %s",
d1681e
-                        loc->path, from->name);
d1681e
-        }
d1681e
-
d1681e
-        /* Copying posix acls to the linkto file messes up the permissions*/
d1681e
-        dht_strip_out_acls (xattr);
d1681e
 
d1681e
         /* create the destination, with required modes/xattr */
d1681e
         ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
d1681e
-                                               &dst_fd, xattr, fop_errno);
d1681e
+                                               &dst_fd, fop_errno);
d1681e
         if (ret) {
d1681e
                 gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed"
d1681e
                         " on - %s for file - %s", to->name, loc->path);
d1681e
@@ -1683,7 +1647,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
d1681e
                  * as in case of failure the linkto needs to point to the source
d1681e
                  * subvol */
d1681e
                 ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
d1681e
-                                                       &dst_fd, xattr, fop_errno);
d1681e
+                                                       &dst_fd, fop_errno);
d1681e
                 if (ret) {
d1681e
                         gf_log (this->name, GF_LOG_ERROR, "Create dst failed"
d1681e
                                 " on - %s for file - %s", to->name, loc->path);
d1681e
@@ -1709,8 +1673,44 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
d1681e
                         loc->path, from->name);
d1681e
                 goto out;
d1681e
         }
d1681e
+
d1681e
+        /* TODO: move all xattr related operations to fd based operations */
d1681e
+        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
d1681e
+        if (ret < 0) {
d1681e
+                *fop_errno = -ret;
d1681e
+                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
d1681e
+                        DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
+                        "Migrate file failed:"
d1681e
+                        "%s: failed to get xattr from %s",
d1681e
+                        loc->path, from->name);
d1681e
+                ret = -1;
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+        /* Copying posix acls to the linkto file messes up the permissions*/
d1681e
+        dht_strip_out_acls (xattr);
d1681e
+
d1681e
+        /* Remove the linkto xattr as we don't want to overwrite the value
d1681e
+         * set on the dst.
d1681e
+         */
d1681e
+        dict_del (xattr, conf->link_xattr_name);
d1681e
+
d1681e
+        /* We need to error out if this fails as having the wrong shard xattrs
d1681e
+         * set on the dst could cause data corruption
d1681e
+         */
d1681e
+        ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL);
d1681e
+        if (ret < 0) {
d1681e
+                *fop_errno = -ret;
d1681e
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
d1681e
+                        DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
+                        "%s: failed to set xattr on %s",
d1681e
+                        loc->path, to->name);
d1681e
+                ret = -1;
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
         if (xattr_rsp) {
d1681e
-                /* we no more require this key */
d1681e
+                /* we no longer require this key */
d1681e
                 dict_del (dict, conf->link_xattr_name);
d1681e
                 dict_unref (xattr_rsp);
d1681e
         }
d1681e
@@ -2011,7 +2011,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
d1681e
                 xattr = NULL;
d1681e
         }
d1681e
 
d1681e
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
d1681e
+        /* Set only the Posix ACLs this time */
d1681e
+        ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR,
d1681e
+                               NULL, NULL);
d1681e
         if (ret < 0) {
d1681e
                 gf_msg (this->name, GF_LOG_WARNING, -ret,
d1681e
                         DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
d1681e
index 1577d03..3b9fcf1 100644
d1681e
--- a/xlators/cluster/dht/src/dht-selfheal.c
d1681e
+++ b/xlators/cluster/dht/src/dht-selfheal.c
d1681e
@@ -14,7 +14,6 @@
d1681e
 #include "dht-common.h"
d1681e
 #include "dht-messages.h"
d1681e
 #include "dht-lock.h"
d1681e
-#include "glusterfs-acl.h"
d1681e
 
d1681e
 #define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path)    do {           \
d1681e
                 layout->list[i].start = srt;                            \
d1681e
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
d1681e
index f8d8fed..bc97206 100644
d1681e
--- a/xlators/storage/posix/src/posix-helpers.c
d1681e
+++ b/xlators/storage/posix/src/posix-helpers.c
d1681e
@@ -150,6 +150,37 @@ out:
d1681e
         return ret;
d1681e
 }
d1681e
 
d1681e
+int32_t
d1681e
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
d1681e
+                        struct iatt *in_stbuf)
d1681e
+{
d1681e
+        int ret             = -1;
d1681e
+        mode_t mode         = 0;
d1681e
+
d1681e
+        if ((!in_dict) || (!in_stbuf) || (!out_dict)) {
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+        /* We need this only for files */
d1681e
+        if (!(IA_ISREG (in_stbuf->ia_type))) {
d1681e
+                ret = 0;
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+        /* Nobody asked for this */
d1681e
+        if (!dict_get (in_dict, DHT_MODE_IN_XDATA_KEY)) {
d1681e
+                ret = 0;
d1681e
+                goto out;
d1681e
+        }
d1681e
+        mode = st_mode_from_ia (in_stbuf->ia_prot, in_stbuf->ia_type);
d1681e
+
d1681e
+        ret = dict_set_int32 (out_dict, DHT_MODE_IN_XDATA_KEY, mode);
d1681e
+
d1681e
+out:
d1681e
+        return ret;
d1681e
+}
d1681e
+
d1681e
+
d1681e
 static gf_boolean_t
d1681e
 posix_xattr_ignorable (char *key)
d1681e
 {
d1681e
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
d1681e
index 8aeca3b..a412e6d 100644
d1681e
--- a/xlators/storage/posix/src/posix.c
d1681e
+++ b/xlators/storage/posix/src/posix.c
d1681e
@@ -6146,7 +6146,9 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
d1681e
         if (!xdata_rsp) {
d1681e
                 op_ret = -1;
d1681e
                 op_errno = ENOMEM;
d1681e
+                goto out;
d1681e
         }
d1681e
+        posix_set_mode_in_dict (xdata, xdata_rsp, &stbuf);
d1681e
 out:
d1681e
 
d1681e
         STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp,
d1681e
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
d1681e
index ae9fb08..8e40e6f 100644
d1681e
--- a/xlators/storage/posix/src/posix.h
d1681e
+++ b/xlators/storage/posix/src/posix.h
d1681e
@@ -353,4 +353,8 @@ posix_fdget_objectsignature (int, dict_t *);
d1681e
 
d1681e
 gf_boolean_t
d1681e
 posix_is_bulk_removexattr (char *name, dict_t *dict);
d1681e
+
d1681e
+int32_t
d1681e
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
d1681e
+                        struct iatt *in_stbuf);
d1681e
 #endif /* _POSIX_H */
d1681e
-- 
d1681e
1.8.3.1
d1681e