e7a346
From 4f5197f585ce4117e29e6b6af0e6d91c19eb34ea Mon Sep 17 00:00:00 2001
e7a346
From: N Balachandran <nbalacha@redhat.com>
e7a346
Date: Wed, 3 Jan 2018 10:36:58 +0530
e7a346
Subject: [PATCH 142/148] cluster/dht: Add migration checks to dht_(f)xattrop
e7a346
e7a346
The dht_(f)xattrop implementation did not implement
e7a346
migration phase1/phase2 checks which could cause issues
e7a346
with rebalance on sharded volumes.
e7a346
This does not solve the issue where fops may reach the target
e7a346
out of order.
e7a346
e7a346
upstream : https://review.gluster.org/#/c/17776
e7a346
e7a346
> Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c
e7a346
> BUG: 1471031
e7a346
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
e7a346
e7a346
Change-Id: I95b453421809c543ba8e4febd9a12c84e9439a29
e7a346
BUG: 1530146
e7a346
Signed-off-by: N Balachandran <nbalacha@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/126959
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
---
e7a346
 libglusterfs/src/glusterfs.h              |   1 +
e7a346
 xlators/cluster/dht/src/dht-common.c      |  48 +++++-
e7a346
 xlators/cluster/dht/src/dht-common.h      |  10 ++
e7a346
 xlators/cluster/dht/src/dht-helper.c      |   3 +
e7a346
 xlators/cluster/dht/src/dht-inode-read.c  | 241 +++++++++++++++++++++++++++---
e7a346
 xlators/cluster/dht/src/dht-rebalance.c   |  86 +++++------
e7a346
 xlators/cluster/dht/src/dht-selfheal.c    |   1 -
e7a346
 xlators/storage/posix/src/posix-helpers.c |  31 ++++
e7a346
 xlators/storage/posix/src/posix.c         |   2 +
e7a346
 xlators/storage/posix/src/posix.h         |   4 +
e7a346
 10 files changed, 366 insertions(+), 61 deletions(-)
e7a346
e7a346
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
e7a346
index 18256aa..c8835d9 100644
e7a346
--- a/libglusterfs/src/glusterfs.h
e7a346
+++ b/libglusterfs/src/glusterfs.h
e7a346
@@ -272,6 +272,7 @@
e7a346
 #define TIER_LINKFILE_GFID           "tier-linkfile-gfid"
e7a346
 #define DHT_SKIP_OPEN_FD_UNLINK     "dont-unlink-for-open-fd"
e7a346
 #define DHT_IATT_IN_XDATA_KEY       "dht-get-iatt-in-xattr"
e7a346
+#define DHT_MODE_IN_XDATA_KEY       "dht-get-mode-in-xattr"
e7a346
 #define GET_LINK_COUNT              "get-link-count"
e7a346
 #define GF_GET_SIZE                 "get-size"
e7a346
 
e7a346
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
e7a346
index b55cb36..c2d0827 100644
e7a346
--- a/xlators/cluster/dht/src/dht-common.c
e7a346
+++ b/xlators/cluster/dht/src/dht-common.c
e7a346
@@ -18,7 +18,6 @@
e7a346
 #include "dht-lock.h"
e7a346
 #include "defaults.h"
e7a346
 #include "byte-order.h"
e7a346
-#include "glusterfs-acl.h"
e7a346
 #include "quota-common-utils.h"
e7a346
 #include "upcall-utils.h"
e7a346
 
e7a346
@@ -46,6 +45,11 @@ int
e7a346
 dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
e7a346
 
e7a346
 
e7a346
+int
e7a346
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
e7a346
+                        dict_t *xdata);
e7a346
+
e7a346
 
e7a346
 /* Sets the blocks and size values to fixed values. This is to be called
e7a346
  * only for dirs. The caller is responsible for checking the type
e7a346
@@ -61,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat)
e7a346
 }
e7a346
 
e7a346
 
e7a346
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
e7a346
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
e7a346
+ * DHT_IATT_IN_XDATA_KEY
e7a346
+ */
e7a346
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req)
e7a346
+{
e7a346
+        int ret = -1;
e7a346
+
e7a346
+        ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
e7a346
+        ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
e7a346
+
e7a346
+        /* At least one call succeeded */
e7a346
+        return ret;
e7a346
+}
e7a346
+
e7a346
+
e7a346
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
e7a346
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
e7a346
+ * DHT_IATT_IN_XDATA_KEY
e7a346
+ * This will return a dummy iatt with only the mode and type set
e7a346
+ */
e7a346
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
e7a346
+                              struct iatt *stbuf)
e7a346
+{
e7a346
+        int ret = -1;
e7a346
+        int32_t mode = 0;
e7a346
+
e7a346
+        ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode);
e7a346
+
e7a346
+        if (ret) {
e7a346
+                ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY,
e7a346
+                                    (void **)&stbuf);
e7a346
+        } else {
e7a346
+                stbuf->ia_prot = ia_prot_from_st_mode (mode);
e7a346
+                stbuf->ia_type = ia_type_from_st_mode (mode);
e7a346
+        }
e7a346
+
e7a346
+        return ret;
e7a346
+}
e7a346
+
e7a346
+
e7a346
+
e7a346
 int
e7a346
 dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
e7a346
 
e7a346
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
e7a346
index e2afd6c..47a2e23 100644
e7a346
--- a/xlators/cluster/dht/src/dht-common.h
e7a346
+++ b/xlators/cluster/dht/src/dht-common.h
e7a346
@@ -20,6 +20,7 @@
e7a346
 #include "refcount.h"
e7a346
 #include "timer.h"
e7a346
 #include "protocol-common.h"
e7a346
+#include "glusterfs-acl.h"
e7a346
 
e7a346
 #ifndef _DHT_H
e7a346
 #define _DHT_H
e7a346
@@ -146,6 +147,7 @@ struct dht_rebalance_ {
e7a346
         dht_defrag_cbk_fn_t  target_op_fn;
e7a346
         dict_t              *xdata;
e7a346
         dict_t              *xattr;
e7a346
+        dict_t              *dict;
e7a346
         int32_t              set;
e7a346
         struct gf_flock      flock;
e7a346
         int                  lock_cmd;
e7a346
@@ -1416,4 +1418,12 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
 int
e7a346
 dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
                        int op_ret, int op_errno, dict_t *xdata);
e7a346
+
e7a346
+/* Abstract out the DHT-IATT-IN-DICT */
e7a346
+
e7a346
+
e7a346
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req);
e7a346
+
e7a346
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
e7a346
+                              struct iatt *stbuf);
e7a346
 #endif/* _DHT_H */
e7a346
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
e7a346
index cca2bfe..e56a085 100644
e7a346
--- a/xlators/cluster/dht/src/dht-helper.c
e7a346
+++ b/xlators/cluster/dht/src/dht-helper.c
e7a346
@@ -797,6 +797,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
e7a346
         if (local->rebalance.xattr)
e7a346
                 dict_unref (local->rebalance.xattr);
e7a346
 
e7a346
+        if (local->rebalance.dict)
e7a346
+                dict_unref (local->rebalance.dict);
e7a346
+
e7a346
         GF_FREE (local->rebalance.vector);
e7a346
 
e7a346
         if (local->rebalance.iobref)
e7a346
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
e7a346
index a9e4766..fa63fef 100644
e7a346
--- a/xlators/cluster/dht/src/dht-inode-read.c
e7a346
+++ b/xlators/cluster/dht/src/dht-inode-read.c
e7a346
@@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node,
e7a346
              call_frame_t *frame, int ret);
e7a346
 int dht_fsync2 (xlator_t *this, xlator_t *dst_node,
e7a346
                 call_frame_t *frame, int ret);
e7a346
-
e7a346
-
e7a346
+int
e7a346
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
e7a346
+                     int ret);
e7a346
 
e7a346
 int
e7a346
 dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
@@ -1246,13 +1247,163 @@ err:
e7a346
         return 0;
e7a346
 }
e7a346
 
e7a346
-/* Currently no translators on top of 'distribute' will be using
e7a346
- * below fops, hence not implementing 'migration' related checks
e7a346
- */
e7a346
+
e7a346
+int
e7a346
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
e7a346
+                        dict_t *xdata)
e7a346
+{
e7a346
+        dht_local_t  *local          = NULL;
e7a346
+        call_frame_t *call_frame     = NULL;
e7a346
+        xlator_t     *prev           = NULL;
e7a346
+        xlator_t     *src_subvol     = NULL;
e7a346
+        xlator_t     *dst_subvol     = NULL;
e7a346
+        struct iatt   stbuf          = {0,};
e7a346
+        int           ret            = -1;
e7a346
+        inode_t      *inode          = NULL;
e7a346
+
e7a346
+        local = frame->local;
e7a346
+        call_frame = cookie;
e7a346
+        prev = call_frame->this;
e7a346
+
e7a346
+        local->op_errno = op_errno;
e7a346
+
e7a346
+        if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
e7a346
+                gf_msg_debug (this->name, op_errno,
e7a346
+                              "subvolume %s returned -1.",
e7a346
+                              prev->name);
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
+        if (local->call_cnt != 1)
e7a346
+                goto out;
e7a346
+
e7a346
+        ret = dht_read_iatt_from_xdata (this, xdata, &stbuf);
e7a346
+
e7a346
+        if ((!op_ret) && (ret)) {
e7a346
+                /* This is a potential problem and can cause corruption
e7a346
+                 * with sharding.
e7a346
+                 * Oh well. We tried.
e7a346
+                 */
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
+        local->op_ret = op_ret;
e7a346
+        local->rebalance.target_op_fn = dht_common_xattrop2;
e7a346
+        if (xdata)
e7a346
+                local->rebalance.xdata = dict_ref (xdata);
e7a346
+
e7a346
+        if (dict)
e7a346
+                local->rebalance.dict = dict_ref (dict);
e7a346
+
e7a346
+        /* Phase 2 of migration */
e7a346
+        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) {
e7a346
+                ret = dht_rebalance_complete_check (this, frame);
e7a346
+                if (!ret)
e7a346
+                        return 0;
e7a346
+        }
e7a346
+
e7a346
+        /* Check if the rebalance phase1 is true */
e7a346
+        if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) {
e7a346
+
e7a346
+                inode = local->loc.inode ? local->loc.inode : local->fd->inode;
e7a346
+                dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
e7a346
+                                            &dst_subvol);
e7a346
+
e7a346
+                if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
e7a346
+                                             dst_subvol) ||
e7a346
+                      !dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
e7a346
+
e7a346
+                        ret = dht_rebalance_in_progress_check (this, frame);
e7a346
+                        if (!ret)
e7a346
+                                return 0;
e7a346
+                } else {
e7a346
+                        dht_common_xattrop2 (this, dst_subvol, frame, 0);
e7a346
+                        return 0;
e7a346
+                }
e7a346
+        }
e7a346
+
e7a346
+
e7a346
+out:
e7a346
+        if (local->fop == GF_FOP_XATTROP) {
e7a346
+                DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno,
e7a346
+                                  dict, xdata);
e7a346
+        } else {
e7a346
+                DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno,
e7a346
+                                  dict, xdata);
e7a346
+        }
e7a346
+
e7a346
+        return 0;
e7a346
+}
e7a346
+
e7a346
+
e7a346
+int
e7a346
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
e7a346
+                     int ret)
e7a346
+{
e7a346
+        dht_local_t *local    = NULL;
e7a346
+        int32_t      op_errno = EINVAL;
e7a346
+
e7a346
+        if ((frame == NULL) || (frame->local == NULL))
e7a346
+                goto out;
e7a346
+
e7a346
+        local = frame->local;
e7a346
+        op_errno = local->op_errno;
e7a346
+
e7a346
+        if (we_are_not_migrating (ret)) {
e7a346
+                /* This dht xlator is not migrating the file. Unwind and
e7a346
+                 * pass on the original mode bits so the higher DHT layer
e7a346
+                 * can handle this.
e7a346
+                 */
e7a346
+                if (local->fop == GF_FOP_XATTROP) {
e7a346
+                        DHT_STACK_UNWIND (xattrop, frame, local->op_ret,
e7a346
+                                          op_errno, local->rebalance.dict,
e7a346
+                                          local->rebalance.xdata);
e7a346
+                } else {
e7a346
+                        DHT_STACK_UNWIND (fxattrop, frame, local->op_ret,
e7a346
+                                          op_errno, local->rebalance.dict,
e7a346
+                                          local->rebalance.xdata);
e7a346
+                }
e7a346
+
e7a346
+                return 0;
e7a346
+        }
e7a346
+
e7a346
+        if (subvol == NULL)
e7a346
+                goto out;
e7a346
+
e7a346
+        local->call_cnt = 2; /* This is the second attempt */
e7a346
+
e7a346
+        if (local->fop == GF_FOP_XATTROP) {
e7a346
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
e7a346
+                            subvol->fops->xattrop, &local->loc,
e7a346
+                            local->rebalance.flags, local->rebalance.xattr,
e7a346
+                            local->xattr_req);
e7a346
+        } else {
e7a346
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
e7a346
+                            subvol->fops->fxattrop, local->fd,
e7a346
+                            local->rebalance.flags, local->rebalance.xattr,
e7a346
+                            local->xattr_req);
e7a346
+        }
e7a346
+
e7a346
+        return 0;
e7a346
+
e7a346
+out:
e7a346
+
e7a346
+        /* If local is unavailable we could be unwinding the wrong
e7a346
+         * function here */
e7a346
+
e7a346
+        if (local && (local->fop == GF_FOP_XATTROP)) {
e7a346
+                DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
e7a346
+        } else {
e7a346
+                DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
e7a346
+        }
e7a346
+        return 0;
e7a346
+}
e7a346
+
e7a346
 
e7a346
 int
e7a346
 dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
-                 int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
e7a346
+                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
e7a346
 {
e7a346
         DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
e7a346
         return 0;
e7a346
@@ -1263,9 +1414,10 @@ int
e7a346
 dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
e7a346
              gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
e7a346
 {
e7a346
-        xlator_t     *subvol = NULL;
e7a346
+        xlator_t     *subvol   = NULL;
e7a346
         int           op_errno = -1;
e7a346
-        dht_local_t  *local = NULL;
e7a346
+        dht_local_t  *local    = NULL;
e7a346
+        int           ret      = -1;
e7a346
 
e7a346
         VALIDATE_OR_GOTO (frame, err);
e7a346
         VALIDATE_OR_GOTO (this, err);
e7a346
@@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
e7a346
                 goto err;
e7a346
         }
e7a346
 
e7a346
-        local->call_cnt = 1;
e7a346
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
e7a346
+         * that uses xattrop is sharding and that is only for files */
e7a346
+
e7a346
+        if (IA_ISDIR (loc->inode->ia_type)) {
e7a346
+                STACK_WIND (frame, dht_xattrop_cbk, subvol,
e7a346
+                            subvol->fops->xattrop, loc, flags, dict, xdata);
e7a346
+
e7a346
+        } else {
e7a346
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
e7a346
+                local->call_cnt = 1;
e7a346
 
e7a346
-        STACK_WIND (frame, dht_xattrop_cbk,
e7a346
-                    subvol, subvol->fops->xattrop,
e7a346
-                    loc, flags, dict, xdata);
e7a346
+                local->rebalance.xattr = dict_ref (dict);
e7a346
+                local->rebalance.flags = flags;
e7a346
+
e7a346
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
e7a346
+
e7a346
+                if (ret) {
e7a346
+                        gf_msg_debug (this->name, 0,
e7a346
+                                      "Failed to set dictionary key %s file=%s",
e7a346
+                                      DHT_IATT_IN_XDATA_KEY, loc->path);
e7a346
+                }
e7a346
+
e7a346
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
e7a346
+                            subvol->fops->xattrop, loc,
e7a346
+                            local->rebalance.flags, local->rebalance.xattr,
e7a346
+                            local->xattr_req);
e7a346
+        }
e7a346
 
e7a346
         return 0;
e7a346
 
e7a346
@@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
e7a346
 {
e7a346
         xlator_t     *subvol = NULL;
e7a346
         int           op_errno = -1;
e7a346
+        dht_local_t  *local    = NULL;
e7a346
+        int           ret      = -1;
e7a346
 
e7a346
         VALIDATE_OR_GOTO (frame, err);
e7a346
         VALIDATE_OR_GOTO (this, err);
e7a346
@@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
e7a346
                 goto err;
e7a346
         }
e7a346
 
e7a346
-        STACK_WIND (frame,
e7a346
-                    dht_fxattrop_cbk,
e7a346
-                    subvol, subvol->fops->fxattrop,
e7a346
-                    fd, flags, dict, xdata);
e7a346
+        local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP);
e7a346
+        if (!local) {
e7a346
+                op_errno = ENOMEM;
e7a346
+                goto err;
e7a346
+        }
e7a346
+
e7a346
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
e7a346
+         * that uses xattrop is sharding and that is only for files */
e7a346
+
e7a346
+        if (IA_ISDIR (fd->inode->ia_type)) {
e7a346
+                STACK_WIND (frame, dht_fxattrop_cbk, subvol,
e7a346
+                            subvol->fops->fxattrop, fd, flags, dict, xdata);
e7a346
+
e7a346
+        } else {
e7a346
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
e7a346
+                local->call_cnt = 1;
e7a346
+
e7a346
+                local->rebalance.xattr = dict_ref (dict);
e7a346
+                local->rebalance.flags = flags;
e7a346
+
e7a346
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
e7a346
+
e7a346
+                if (ret) {
e7a346
+                        gf_msg_debug (this->name, 0,
e7a346
+                                      "Failed to set dictionary key %s fd=%p",
e7a346
+                                      DHT_IATT_IN_XDATA_KEY, fd);
e7a346
+                }
e7a346
+
e7a346
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
e7a346
+                            subvol->fops->fxattrop, fd,
e7a346
+                            local->rebalance.flags, local->rebalance.xattr,
e7a346
+                            local->xattr_req);
e7a346
+        }
e7a346
 
e7a346
         return 0;
e7a346
 
e7a346
@@ -1345,6 +1550,9 @@ err:
e7a346
         return 0;
e7a346
 }
e7a346
 
e7a346
+/* Currently no translators on top of 'distribute' will be using
e7a346
+ * below fops, hence not implementing 'migration' related checks
e7a346
+ */
e7a346
 
e7a346
 int
e7a346
 dht_inodelk_cbk (call_frame_t *frame, void *cookie,
e7a346
@@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
e7a346
                   int32_t op_ret, int32_t op_errno, dict_t *xdata)
e7a346
 
e7a346
 {
e7a346
-
e7a346
         dht_lk_inode_unref (frame, op_ret);
e7a346
         DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
e7a346
         return 0;
e7a346
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
e7a346
index ae367d7..3343a2b 100644
e7a346
--- a/xlators/cluster/dht/src/dht-rebalance.c
e7a346
+++ b/xlators/cluster/dht/src/dht-rebalance.c
e7a346
@@ -168,7 +168,7 @@ dht_strip_out_acls (dict_t *dict)
e7a346
 {
e7a346
         if (dict) {
e7a346
                 dict_del (dict, "trusted.SGI_ACL_FILE");
e7a346
-                dict_del (dict, "POSIX_ACL_ACCESS_XATTR");
e7a346
+                dict_del (dict, POSIX_ACL_ACCESS_XATTR);
e7a346
         }
e7a346
 }
e7a346
 
e7a346
@@ -665,7 +665,7 @@ out:
e7a346
 static int
e7a346
 __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
e7a346
                                  loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
e7a346
-                                 dict_t *xattr, int *fop_errno)
e7a346
+                                 int *fop_errno)
e7a346
 {
e7a346
         int          ret  = -1;
e7a346
         fd_t        *fd   = NULL;
e7a346
@@ -810,28 +810,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
e7a346
                 goto out;
e7a346
         }
e7a346
 
e7a346
-        ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
e7a346
-        if (ret < 0) {
e7a346
-                *fop_errno = -ret;
e7a346
-                gf_msg (this->name, GF_LOG_WARNING, -ret,
e7a346
-                        DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
-                        "%s: failed to set xattr on %s",
e7a346
-                        loc->path, to->name);
e7a346
-
e7a346
-        }
e7a346
-
e7a346
-        /* TODO: Need to add a detailed comment about why we moved away from
e7a346
-        ftruncate.
e7a346
-
e7a346
-        ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
e7a346
-        if (ret < 0) {
e7a346
-                *fop_errno = -ret;
e7a346
-                gf_msg (this->name, GF_LOG_ERROR, -ret,
e7a346
-                        DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
-                        "ftruncate failed for %s on %s",
e7a346
-                        loc->path, to->name);
e7a346
-        */
e7a346
-
e7a346
         ret = syncop_fsetattr (to, fd, stbuf,
e7a346
                                (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
e7a346
                                 NULL, NULL, NULL, NULL);
e7a346
@@ -1620,24 +1598,10 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
e7a346
         }
e7a346
 
e7a346
 
e7a346
-        /* TODO: move all xattr related operations to fd based operations */
e7a346
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
e7a346
-        if (ret < 0) {
e7a346
-                *fop_errno = -ret;
e7a346
-                ret = -1;
e7a346
-                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
e7a346
-                        DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
-                        "Migrate file failed:"
e7a346
-                        "%s: failed to get xattr from %s",
e7a346
-                        loc->path, from->name);
e7a346
-        }
e7a346
-
e7a346
-        /* Copying posix acls to the linkto file messes up the permissions*/
e7a346
-        dht_strip_out_acls (xattr);
e7a346
 
e7a346
         /* create the destination, with required modes/xattr */
e7a346
         ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
e7a346
-                                               &dst_fd, xattr, fop_errno);
e7a346
+                                               &dst_fd, fop_errno);
e7a346
         if (ret) {
e7a346
                 gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed"
e7a346
                         " on - %s for file - %s", to->name, loc->path);
e7a346
@@ -1683,7 +1647,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
e7a346
                  * as in case of failure the linkto needs to point to the source
e7a346
                  * subvol */
e7a346
                 ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
e7a346
-                                                       &dst_fd, xattr, fop_errno);
e7a346
+                                                       &dst_fd, fop_errno);
e7a346
                 if (ret) {
e7a346
                         gf_log (this->name, GF_LOG_ERROR, "Create dst failed"
e7a346
                                 " on - %s for file - %s", to->name, loc->path);
e7a346
@@ -1709,8 +1673,44 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
e7a346
                         loc->path, from->name);
e7a346
                 goto out;
e7a346
         }
e7a346
+
e7a346
+        /* TODO: move all xattr related operations to fd based operations */
e7a346
+        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
e7a346
+        if (ret < 0) {
e7a346
+                *fop_errno = -ret;
e7a346
+                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
e7a346
+                        DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
+                        "Migrate file failed:"
e7a346
+                        "%s: failed to get xattr from %s",
e7a346
+                        loc->path, from->name);
e7a346
+                ret = -1;
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
+        /* Copying posix acls to the linkto file messes up the permissions*/
e7a346
+        dht_strip_out_acls (xattr);
e7a346
+
e7a346
+        /* Remove the linkto xattr as we don't want to overwrite the value
e7a346
+         * set on the dst.
e7a346
+         */
e7a346
+        dict_del (xattr, conf->link_xattr_name);
e7a346
+
e7a346
+        /* We need to error out if this fails as having the wrong shard xattrs
e7a346
+         * set on the dst could cause data corruption
e7a346
+         */
e7a346
+        ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL);
e7a346
+        if (ret < 0) {
e7a346
+                *fop_errno = -ret;
e7a346
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
e7a346
+                        DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
+                        "%s: failed to set xattr on %s",
e7a346
+                        loc->path, to->name);
e7a346
+                ret = -1;
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
         if (xattr_rsp) {
e7a346
-                /* we no more require this key */
e7a346
+                /* we no longer require this key */
e7a346
                 dict_del (dict, conf->link_xattr_name);
e7a346
                 dict_unref (xattr_rsp);
e7a346
         }
e7a346
@@ -2011,7 +2011,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
e7a346
                 xattr = NULL;
e7a346
         }
e7a346
 
e7a346
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
e7a346
+        /* Set only the Posix ACLs this time */
e7a346
+        ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR,
e7a346
+                               NULL, NULL);
e7a346
         if (ret < 0) {
e7a346
                 gf_msg (this->name, GF_LOG_WARNING, -ret,
e7a346
                         DHT_MSG_MIGRATE_FILE_FAILED,
e7a346
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
e7a346
index 1577d03..3b9fcf1 100644
e7a346
--- a/xlators/cluster/dht/src/dht-selfheal.c
e7a346
+++ b/xlators/cluster/dht/src/dht-selfheal.c
e7a346
@@ -14,7 +14,6 @@
e7a346
 #include "dht-common.h"
e7a346
 #include "dht-messages.h"
e7a346
 #include "dht-lock.h"
e7a346
-#include "glusterfs-acl.h"
e7a346
 
e7a346
 #define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path)    do {           \
e7a346
                 layout->list[i].start = srt;                            \
e7a346
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
e7a346
index f8d8fed..bc97206 100644
e7a346
--- a/xlators/storage/posix/src/posix-helpers.c
e7a346
+++ b/xlators/storage/posix/src/posix-helpers.c
e7a346
@@ -150,6 +150,37 @@ out:
e7a346
         return ret;
e7a346
 }
e7a346
 
e7a346
+int32_t
e7a346
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
e7a346
+                        struct iatt *in_stbuf)
e7a346
+{
e7a346
+        int ret             = -1;
e7a346
+        mode_t mode         = 0;
e7a346
+
e7a346
+        if ((!in_dict) || (!in_stbuf) || (!out_dict)) {
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
+        /* We need this only for files */
e7a346
+        if (!(IA_ISREG (in_stbuf->ia_type))) {
e7a346
+                ret = 0;
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
+        /* Nobody asked for this */
e7a346
+        if (!dict_get (in_dict, DHT_MODE_IN_XDATA_KEY)) {
e7a346
+                ret = 0;
e7a346
+                goto out;
e7a346
+        }
e7a346
+        mode = st_mode_from_ia (in_stbuf->ia_prot, in_stbuf->ia_type);
e7a346
+
e7a346
+        ret = dict_set_int32 (out_dict, DHT_MODE_IN_XDATA_KEY, mode);
e7a346
+
e7a346
+out:
e7a346
+        return ret;
e7a346
+}
e7a346
+
e7a346
+
e7a346
 static gf_boolean_t
e7a346
 posix_xattr_ignorable (char *key)
e7a346
 {
e7a346
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
e7a346
index 8aeca3b..a412e6d 100644
e7a346
--- a/xlators/storage/posix/src/posix.c
e7a346
+++ b/xlators/storage/posix/src/posix.c
e7a346
@@ -6146,7 +6146,9 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
e7a346
         if (!xdata_rsp) {
e7a346
                 op_ret = -1;
e7a346
                 op_errno = ENOMEM;
e7a346
+                goto out;
e7a346
         }
e7a346
+        posix_set_mode_in_dict (xdata, xdata_rsp, &stbuf);
e7a346
 out:
e7a346
 
e7a346
         STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp,
e7a346
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
e7a346
index ae9fb08..8e40e6f 100644
e7a346
--- a/xlators/storage/posix/src/posix.h
e7a346
+++ b/xlators/storage/posix/src/posix.h
e7a346
@@ -353,4 +353,8 @@ posix_fdget_objectsignature (int, dict_t *);
e7a346
 
e7a346
 gf_boolean_t
e7a346
 posix_is_bulk_removexattr (char *name, dict_t *dict);
e7a346
+
e7a346
+int32_t
e7a346
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
e7a346
+                        struct iatt *in_stbuf);
e7a346
 #endif /* _POSIX_H */
e7a346
-- 
e7a346
1.8.3.1
e7a346