a3470f
From 4f5197f585ce4117e29e6b6af0e6d91c19eb34ea Mon Sep 17 00:00:00 2001
a3470f
From: N Balachandran <nbalacha@redhat.com>
a3470f
Date: Wed, 3 Jan 2018 10:36:58 +0530
a3470f
Subject: [PATCH 142/148] cluster/dht: Add migration checks to dht_(f)xattrop
a3470f
a3470f
The dht_(f)xattrop implementation did not implement
a3470f
migration phase1/phase2 checks which could cause issues
a3470f
with rebalance on sharded volumes.
a3470f
This does not solve the issue where fops may reach the target
a3470f
out of order.
a3470f
a3470f
upstream : https://review.gluster.org/#/c/17776
a3470f
a3470f
> Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c
a3470f
> BUG: 1471031
a3470f
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
a3470f
a3470f
Change-Id: I95b453421809c543ba8e4febd9a12c84e9439a29
a3470f
BUG: 1530146
a3470f
Signed-off-by: N Balachandran <nbalacha@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/126959
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
a3470f
---
a3470f
 libglusterfs/src/glusterfs.h              |   1 +
a3470f
 xlators/cluster/dht/src/dht-common.c      |  48 +++++-
a3470f
 xlators/cluster/dht/src/dht-common.h      |  10 ++
a3470f
 xlators/cluster/dht/src/dht-helper.c      |   3 +
a3470f
 xlators/cluster/dht/src/dht-inode-read.c  | 241 +++++++++++++++++++++++++++---
a3470f
 xlators/cluster/dht/src/dht-rebalance.c   |  86 +++++------
a3470f
 xlators/cluster/dht/src/dht-selfheal.c    |   1 -
a3470f
 xlators/storage/posix/src/posix-helpers.c |  31 ++++
a3470f
 xlators/storage/posix/src/posix.c         |   2 +
a3470f
 xlators/storage/posix/src/posix.h         |   4 +
a3470f
 10 files changed, 366 insertions(+), 61 deletions(-)
a3470f
a3470f
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
a3470f
index 18256aa..c8835d9 100644
a3470f
--- a/libglusterfs/src/glusterfs.h
a3470f
+++ b/libglusterfs/src/glusterfs.h
a3470f
@@ -272,6 +272,7 @@
a3470f
 #define TIER_LINKFILE_GFID           "tier-linkfile-gfid"
a3470f
 #define DHT_SKIP_OPEN_FD_UNLINK     "dont-unlink-for-open-fd"
a3470f
 #define DHT_IATT_IN_XDATA_KEY       "dht-get-iatt-in-xattr"
a3470f
+#define DHT_MODE_IN_XDATA_KEY       "dht-get-mode-in-xattr"
a3470f
 #define GET_LINK_COUNT              "get-link-count"
a3470f
 #define GF_GET_SIZE                 "get-size"
a3470f
 
a3470f
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
a3470f
index b55cb36..c2d0827 100644
a3470f
--- a/xlators/cluster/dht/src/dht-common.c
a3470f
+++ b/xlators/cluster/dht/src/dht-common.c
a3470f
@@ -18,7 +18,6 @@
a3470f
 #include "dht-lock.h"
a3470f
 #include "defaults.h"
a3470f
 #include "byte-order.h"
a3470f
-#include "glusterfs-acl.h"
a3470f
 #include "quota-common-utils.h"
a3470f
 #include "upcall-utils.h"
a3470f
 
a3470f
@@ -46,6 +45,11 @@ int
a3470f
 dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
a3470f
 
a3470f
 
a3470f
+int
a3470f
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
a3470f
+                        dict_t *xdata);
a3470f
+
a3470f
 
a3470f
 /* Sets the blocks and size values to fixed values. This is to be called
a3470f
  * only for dirs. The caller is responsible for checking the type
a3470f
@@ -61,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat)
a3470f
 }
a3470f
 
a3470f
 
a3470f
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
a3470f
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
a3470f
+ * DHT_IATT_IN_XDATA_KEY
a3470f
+ */
a3470f
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req)
a3470f
+{
a3470f
+        int ret = -1;
a3470f
+
a3470f
+        ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
a3470f
+        ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
a3470f
+
a3470f
+        /* At least one call succeeded */
a3470f
+        return ret;
a3470f
+}
a3470f
+
a3470f
+
a3470f
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
a3470f
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
a3470f
+ * DHT_IATT_IN_XDATA_KEY
a3470f
+ * This will return a dummy iatt with only the mode and type set
a3470f
+ */
a3470f
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
a3470f
+                              struct iatt *stbuf)
a3470f
+{
a3470f
+        int ret = -1;
a3470f
+        int32_t mode = 0;
a3470f
+
a3470f
+        ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode);
a3470f
+
a3470f
+        if (ret) {
a3470f
+                ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY,
a3470f
+                                    (void **)&stbuf);
a3470f
+        } else {
a3470f
+                stbuf->ia_prot = ia_prot_from_st_mode (mode);
a3470f
+                stbuf->ia_type = ia_type_from_st_mode (mode);
a3470f
+        }
a3470f
+
a3470f
+        return ret;
a3470f
+}
a3470f
+
a3470f
+
a3470f
+
a3470f
 int
a3470f
 dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
a3470f
 
a3470f
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
a3470f
index e2afd6c..47a2e23 100644
a3470f
--- a/xlators/cluster/dht/src/dht-common.h
a3470f
+++ b/xlators/cluster/dht/src/dht-common.h
a3470f
@@ -20,6 +20,7 @@
a3470f
 #include "refcount.h"
a3470f
 #include "timer.h"
a3470f
 #include "protocol-common.h"
a3470f
+#include "glusterfs-acl.h"
a3470f
 
a3470f
 #ifndef _DHT_H
a3470f
 #define _DHT_H
a3470f
@@ -146,6 +147,7 @@ struct dht_rebalance_ {
a3470f
         dht_defrag_cbk_fn_t  target_op_fn;
a3470f
         dict_t              *xdata;
a3470f
         dict_t              *xattr;
a3470f
+        dict_t              *dict;
a3470f
         int32_t              set;
a3470f
         struct gf_flock      flock;
a3470f
         int                  lock_cmd;
a3470f
@@ -1416,4 +1418,12 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
 int
a3470f
 dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
                        int op_ret, int op_errno, dict_t *xdata);
a3470f
+
a3470f
+/* Abstract out the DHT-IATT-IN-DICT */
a3470f
+
a3470f
+
a3470f
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req);
a3470f
+
a3470f
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
a3470f
+                              struct iatt *stbuf);
a3470f
 #endif/* _DHT_H */
a3470f
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
a3470f
index cca2bfe..e56a085 100644
a3470f
--- a/xlators/cluster/dht/src/dht-helper.c
a3470f
+++ b/xlators/cluster/dht/src/dht-helper.c
a3470f
@@ -797,6 +797,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
a3470f
         if (local->rebalance.xattr)
a3470f
                 dict_unref (local->rebalance.xattr);
a3470f
 
a3470f
+        if (local->rebalance.dict)
a3470f
+                dict_unref (local->rebalance.dict);
a3470f
+
a3470f
         GF_FREE (local->rebalance.vector);
a3470f
 
a3470f
         if (local->rebalance.iobref)
a3470f
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
a3470f
index a9e4766..fa63fef 100644
a3470f
--- a/xlators/cluster/dht/src/dht-inode-read.c
a3470f
+++ b/xlators/cluster/dht/src/dht-inode-read.c
a3470f
@@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node,
a3470f
              call_frame_t *frame, int ret);
a3470f
 int dht_fsync2 (xlator_t *this, xlator_t *dst_node,
a3470f
                 call_frame_t *frame, int ret);
a3470f
-
a3470f
-
a3470f
+int
a3470f
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
a3470f
+                     int ret);
a3470f
 
a3470f
 int
a3470f
 dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
@@ -1246,13 +1247,163 @@ err:
a3470f
         return 0;
a3470f
 }
a3470f
 
a3470f
-/* Currently no translators on top of 'distribute' will be using
a3470f
- * below fops, hence not implementing 'migration' related checks
a3470f
- */
a3470f
+
a3470f
+int
a3470f
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
+                        int32_t op_ret, int32_t op_errno, dict_t *dict,
a3470f
+                        dict_t *xdata)
a3470f
+{
a3470f
+        dht_local_t  *local          = NULL;
a3470f
+        call_frame_t *call_frame     = NULL;
a3470f
+        xlator_t     *prev           = NULL;
a3470f
+        xlator_t     *src_subvol     = NULL;
a3470f
+        xlator_t     *dst_subvol     = NULL;
a3470f
+        struct iatt   stbuf          = {0,};
a3470f
+        int           ret            = -1;
a3470f
+        inode_t      *inode          = NULL;
a3470f
+
a3470f
+        local = frame->local;
a3470f
+        call_frame = cookie;
a3470f
+        prev = call_frame->this;
a3470f
+
a3470f
+        local->op_errno = op_errno;
a3470f
+
a3470f
+        if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
a3470f
+                gf_msg_debug (this->name, op_errno,
a3470f
+                              "subvolume %s returned -1.",
a3470f
+                              prev->name);
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        if (local->call_cnt != 1)
a3470f
+                goto out;
a3470f
+
a3470f
+        ret = dht_read_iatt_from_xdata (this, xdata, &stbuf);
a3470f
+
a3470f
+        if ((!op_ret) && (ret)) {
a3470f
+                /* This is a potential problem and can cause corruption
a3470f
+                 * with sharding.
a3470f
+                 * Oh well. We tried.
a3470f
+                 */
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        local->op_ret = op_ret;
a3470f
+        local->rebalance.target_op_fn = dht_common_xattrop2;
a3470f
+        if (xdata)
a3470f
+                local->rebalance.xdata = dict_ref (xdata);
a3470f
+
a3470f
+        if (dict)
a3470f
+                local->rebalance.dict = dict_ref (dict);
a3470f
+
a3470f
+        /* Phase 2 of migration */
a3470f
+        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) {
a3470f
+                ret = dht_rebalance_complete_check (this, frame);
a3470f
+                if (!ret)
a3470f
+                        return 0;
a3470f
+        }
a3470f
+
a3470f
+        /* Check if the rebalance phase1 is true */
a3470f
+        if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) {
a3470f
+
a3470f
+                inode = local->loc.inode ? local->loc.inode : local->fd->inode;
a3470f
+                dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
a3470f
+                                            &dst_subvol);
a3470f
+
a3470f
+                if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
a3470f
+                                             dst_subvol) ||
a3470f
+                      !dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
a3470f
+
a3470f
+                        ret = dht_rebalance_in_progress_check (this, frame);
a3470f
+                        if (!ret)
a3470f
+                                return 0;
a3470f
+                } else {
a3470f
+                        dht_common_xattrop2 (this, dst_subvol, frame, 0);
a3470f
+                        return 0;
a3470f
+                }
a3470f
+        }
a3470f
+
a3470f
+
a3470f
+out:
a3470f
+        if (local->fop == GF_FOP_XATTROP) {
a3470f
+                DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno,
a3470f
+                                  dict, xdata);
a3470f
+        } else {
a3470f
+                DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno,
a3470f
+                                  dict, xdata);
a3470f
+        }
a3470f
+
a3470f
+        return 0;
a3470f
+}
a3470f
+
a3470f
+
a3470f
+int
a3470f
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
a3470f
+                     int ret)
a3470f
+{
a3470f
+        dht_local_t *local    = NULL;
a3470f
+        int32_t      op_errno = EINVAL;
a3470f
+
a3470f
+        if ((frame == NULL) || (frame->local == NULL))
a3470f
+                goto out;
a3470f
+
a3470f
+        local = frame->local;
a3470f
+        op_errno = local->op_errno;
a3470f
+
a3470f
+        if (we_are_not_migrating (ret)) {
a3470f
+                /* This dht xlator is not migrating the file. Unwind and
a3470f
+                 * pass on the original mode bits so the higher DHT layer
a3470f
+                 * can handle this.
a3470f
+                 */
a3470f
+                if (local->fop == GF_FOP_XATTROP) {
a3470f
+                        DHT_STACK_UNWIND (xattrop, frame, local->op_ret,
a3470f
+                                          op_errno, local->rebalance.dict,
a3470f
+                                          local->rebalance.xdata);
a3470f
+                } else {
a3470f
+                        DHT_STACK_UNWIND (fxattrop, frame, local->op_ret,
a3470f
+                                          op_errno, local->rebalance.dict,
a3470f
+                                          local->rebalance.xdata);
a3470f
+                }
a3470f
+
a3470f
+                return 0;
a3470f
+        }
a3470f
+
a3470f
+        if (subvol == NULL)
a3470f
+                goto out;
a3470f
+
a3470f
+        local->call_cnt = 2; /* This is the second attempt */
a3470f
+
a3470f
+        if (local->fop == GF_FOP_XATTROP) {
a3470f
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
a3470f
+                            subvol->fops->xattrop, &local->loc,
a3470f
+                            local->rebalance.flags, local->rebalance.xattr,
a3470f
+                            local->xattr_req);
a3470f
+        } else {
a3470f
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
a3470f
+                            subvol->fops->fxattrop, local->fd,
a3470f
+                            local->rebalance.flags, local->rebalance.xattr,
a3470f
+                            local->xattr_req);
a3470f
+        }
a3470f
+
a3470f
+        return 0;
a3470f
+
a3470f
+out:
a3470f
+
a3470f
+        /* If local is unavailable we could be unwinding the wrong
a3470f
+         * function here */
a3470f
+
a3470f
+        if (local && (local->fop == GF_FOP_XATTROP)) {
a3470f
+                DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
a3470f
+        } else {
a3470f
+                DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
a3470f
+        }
a3470f
+        return 0;
a3470f
+}
a3470f
+
a3470f
 
a3470f
 int
a3470f
 dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
-                 int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
a3470f
+                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
a3470f
 {
a3470f
         DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
a3470f
         return 0;
a3470f
@@ -1263,9 +1414,10 @@ int
a3470f
 dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
a3470f
              gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
a3470f
 {
a3470f
-        xlator_t     *subvol = NULL;
a3470f
+        xlator_t     *subvol   = NULL;
a3470f
         int           op_errno = -1;
a3470f
-        dht_local_t  *local = NULL;
a3470f
+        dht_local_t  *local    = NULL;
a3470f
+        int           ret      = -1;
a3470f
 
a3470f
         VALIDATE_OR_GOTO (frame, err);
a3470f
         VALIDATE_OR_GOTO (this, err);
a3470f
@@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
a3470f
                 goto err;
a3470f
         }
a3470f
 
a3470f
-        local->call_cnt = 1;
a3470f
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
a3470f
+         * that uses xattrop is sharding and that is only for files */
a3470f
+
a3470f
+        if (IA_ISDIR (loc->inode->ia_type)) {
a3470f
+                STACK_WIND (frame, dht_xattrop_cbk, subvol,
a3470f
+                            subvol->fops->xattrop, loc, flags, dict, xdata);
a3470f
+
a3470f
+        } else {
a3470f
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
a3470f
+                local->call_cnt = 1;
a3470f
 
a3470f
-        STACK_WIND (frame, dht_xattrop_cbk,
a3470f
-                    subvol, subvol->fops->xattrop,
a3470f
-                    loc, flags, dict, xdata);
a3470f
+                local->rebalance.xattr = dict_ref (dict);
a3470f
+                local->rebalance.flags = flags;
a3470f
+
a3470f
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
a3470f
+
a3470f
+                if (ret) {
a3470f
+                        gf_msg_debug (this->name, 0,
a3470f
+                                      "Failed to set dictionary key %s file=%s",
a3470f
+                                      DHT_IATT_IN_XDATA_KEY, loc->path);
a3470f
+                }
a3470f
+
a3470f
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
a3470f
+                            subvol->fops->xattrop, loc,
a3470f
+                            local->rebalance.flags, local->rebalance.xattr,
a3470f
+                            local->xattr_req);
a3470f
+        }
a3470f
 
a3470f
         return 0;
a3470f
 
a3470f
@@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
a3470f
 {
a3470f
         xlator_t     *subvol = NULL;
a3470f
         int           op_errno = -1;
a3470f
+        dht_local_t  *local    = NULL;
a3470f
+        int           ret      = -1;
a3470f
 
a3470f
         VALIDATE_OR_GOTO (frame, err);
a3470f
         VALIDATE_OR_GOTO (this, err);
a3470f
@@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
a3470f
                 goto err;
a3470f
         }
a3470f
 
a3470f
-        STACK_WIND (frame,
a3470f
-                    dht_fxattrop_cbk,
a3470f
-                    subvol, subvol->fops->fxattrop,
a3470f
-                    fd, flags, dict, xdata);
a3470f
+        local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP);
a3470f
+        if (!local) {
a3470f
+                op_errno = ENOMEM;
a3470f
+                goto err;
a3470f
+        }
a3470f
+
a3470f
+        /* Todo : Handle dirs as well. At the moment the only xlator above dht
a3470f
+         * that uses xattrop is sharding and that is only for files */
a3470f
+
a3470f
+        if (IA_ISDIR (fd->inode->ia_type)) {
a3470f
+                STACK_WIND (frame, dht_fxattrop_cbk, subvol,
a3470f
+                            subvol->fops->fxattrop, fd, flags, dict, xdata);
a3470f
+
a3470f
+        } else {
a3470f
+                local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
a3470f
+                local->call_cnt = 1;
a3470f
+
a3470f
+                local->rebalance.xattr = dict_ref (dict);
a3470f
+                local->rebalance.flags = flags;
a3470f
+
a3470f
+                ret = dht_request_iatt_in_xdata (this, local->xattr_req);
a3470f
+
a3470f
+                if (ret) {
a3470f
+                        gf_msg_debug (this->name, 0,
a3470f
+                                      "Failed to set dictionary key %s fd=%p",
a3470f
+                                      DHT_IATT_IN_XDATA_KEY, fd);
a3470f
+                }
a3470f
+
a3470f
+                STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
a3470f
+                            subvol->fops->fxattrop, fd,
a3470f
+                            local->rebalance.flags, local->rebalance.xattr,
a3470f
+                            local->xattr_req);
a3470f
+        }
a3470f
 
a3470f
         return 0;
a3470f
 
a3470f
@@ -1345,6 +1550,9 @@ err:
a3470f
         return 0;
a3470f
 }
a3470f
 
a3470f
+/* Currently no translators on top of 'distribute' will be using
a3470f
+ * below fops, hence not implementing 'migration' related checks
a3470f
+ */
a3470f
 
a3470f
 int
a3470f
 dht_inodelk_cbk (call_frame_t *frame, void *cookie,
a3470f
@@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
a3470f
                   int32_t op_ret, int32_t op_errno, dict_t *xdata)
a3470f
 
a3470f
 {
a3470f
-
a3470f
         dht_lk_inode_unref (frame, op_ret);
a3470f
         DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
a3470f
         return 0;
a3470f
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
a3470f
index ae367d7..3343a2b 100644
a3470f
--- a/xlators/cluster/dht/src/dht-rebalance.c
a3470f
+++ b/xlators/cluster/dht/src/dht-rebalance.c
a3470f
@@ -168,7 +168,7 @@ dht_strip_out_acls (dict_t *dict)
a3470f
 {
a3470f
         if (dict) {
a3470f
                 dict_del (dict, "trusted.SGI_ACL_FILE");
a3470f
-                dict_del (dict, "POSIX_ACL_ACCESS_XATTR");
a3470f
+                dict_del (dict, POSIX_ACL_ACCESS_XATTR);
a3470f
         }
a3470f
 }
a3470f
 
a3470f
@@ -665,7 +665,7 @@ out:
a3470f
 static int
a3470f
 __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
a3470f
                                  loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
a3470f
-                                 dict_t *xattr, int *fop_errno)
a3470f
+                                 int *fop_errno)
a3470f
 {
a3470f
         int          ret  = -1;
a3470f
         fd_t        *fd   = NULL;
a3470f
@@ -810,28 +810,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
a3470f
                 goto out;
a3470f
         }
a3470f
 
a3470f
-        ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
a3470f
-        if (ret < 0) {
a3470f
-                *fop_errno = -ret;
a3470f
-                gf_msg (this->name, GF_LOG_WARNING, -ret,
a3470f
-                        DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
-                        "%s: failed to set xattr on %s",
a3470f
-                        loc->path, to->name);
a3470f
-
a3470f
-        }
a3470f
-
a3470f
-        /* TODO: Need to add a detailed comment about why we moved away from
a3470f
-        ftruncate.
a3470f
-
a3470f
-        ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
a3470f
-        if (ret < 0) {
a3470f
-                *fop_errno = -ret;
a3470f
-                gf_msg (this->name, GF_LOG_ERROR, -ret,
a3470f
-                        DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
-                        "ftruncate failed for %s on %s",
a3470f
-                        loc->path, to->name);
a3470f
-        */
a3470f
-
a3470f
         ret = syncop_fsetattr (to, fd, stbuf,
a3470f
                                (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
a3470f
                                 NULL, NULL, NULL, NULL);
a3470f
@@ -1620,24 +1598,10 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
a3470f
         }
a3470f
 
a3470f
 
a3470f
-        /* TODO: move all xattr related operations to fd based operations */
a3470f
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
a3470f
-        if (ret < 0) {
a3470f
-                *fop_errno = -ret;
a3470f
-                ret = -1;
a3470f
-                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
a3470f
-                        DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
-                        "Migrate file failed:"
a3470f
-                        "%s: failed to get xattr from %s",
a3470f
-                        loc->path, from->name);
a3470f
-        }
a3470f
-
a3470f
-        /* Copying posix acls to the linkto file messes up the permissions*/
a3470f
-        dht_strip_out_acls (xattr);
a3470f
 
a3470f
         /* create the destination, with required modes/xattr */
a3470f
         ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
a3470f
-                                               &dst_fd, xattr, fop_errno);
a3470f
+                                               &dst_fd, fop_errno);
a3470f
         if (ret) {
a3470f
                 gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed"
a3470f
                         " on - %s for file - %s", to->name, loc->path);
a3470f
@@ -1683,7 +1647,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
a3470f
                  * as in case of failure the linkto needs to point to the source
a3470f
                  * subvol */
a3470f
                 ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
a3470f
-                                                       &dst_fd, xattr, fop_errno);
a3470f
+                                                       &dst_fd, fop_errno);
a3470f
                 if (ret) {
a3470f
                         gf_log (this->name, GF_LOG_ERROR, "Create dst failed"
a3470f
                                 " on - %s for file - %s", to->name, loc->path);
a3470f
@@ -1709,8 +1673,44 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
a3470f
                         loc->path, from->name);
a3470f
                 goto out;
a3470f
         }
a3470f
+
a3470f
+        /* TODO: move all xattr related operations to fd based operations */
a3470f
+        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
a3470f
+        if (ret < 0) {
a3470f
+                *fop_errno = -ret;
a3470f
+                gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
a3470f
+                        DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
+                        "Migrate file failed:"
a3470f
+                        "%s: failed to get xattr from %s",
a3470f
+                        loc->path, from->name);
a3470f
+                ret = -1;
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        /* Copying posix acls to the linkto file messes up the permissions*/
a3470f
+        dht_strip_out_acls (xattr);
a3470f
+
a3470f
+        /* Remove the linkto xattr as we don't want to overwrite the value
a3470f
+         * set on the dst.
a3470f
+         */
a3470f
+        dict_del (xattr, conf->link_xattr_name);
a3470f
+
a3470f
+        /* We need to error out if this fails as having the wrong shard xattrs
a3470f
+         * set on the dst could cause data corruption
a3470f
+         */
a3470f
+        ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL);
a3470f
+        if (ret < 0) {
a3470f
+                *fop_errno = -ret;
a3470f
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
a3470f
+                        DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
+                        "%s: failed to set xattr on %s",
a3470f
+                        loc->path, to->name);
a3470f
+                ret = -1;
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
         if (xattr_rsp) {
a3470f
-                /* we no more require this key */
a3470f
+                /* we no longer require this key */
a3470f
                 dict_del (dict, conf->link_xattr_name);
a3470f
                 dict_unref (xattr_rsp);
a3470f
         }
a3470f
@@ -2011,7 +2011,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
a3470f
                 xattr = NULL;
a3470f
         }
a3470f
 
a3470f
-        ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
a3470f
+        /* Set only the Posix ACLs this time */
a3470f
+        ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR,
a3470f
+                               NULL, NULL);
a3470f
         if (ret < 0) {
a3470f
                 gf_msg (this->name, GF_LOG_WARNING, -ret,
a3470f
                         DHT_MSG_MIGRATE_FILE_FAILED,
a3470f
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
a3470f
index 1577d03..3b9fcf1 100644
a3470f
--- a/xlators/cluster/dht/src/dht-selfheal.c
a3470f
+++ b/xlators/cluster/dht/src/dht-selfheal.c
a3470f
@@ -14,7 +14,6 @@
a3470f
 #include "dht-common.h"
a3470f
 #include "dht-messages.h"
a3470f
 #include "dht-lock.h"
a3470f
-#include "glusterfs-acl.h"
a3470f
 
a3470f
 #define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path)    do {           \
a3470f
                 layout->list[i].start = srt;                            \
a3470f
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
a3470f
index f8d8fed..bc97206 100644
a3470f
--- a/xlators/storage/posix/src/posix-helpers.c
a3470f
+++ b/xlators/storage/posix/src/posix-helpers.c
a3470f
@@ -150,6 +150,37 @@ out:
a3470f
         return ret;
a3470f
 }
a3470f
 
a3470f
+int32_t
a3470f
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
a3470f
+                        struct iatt *in_stbuf)
a3470f
+{
a3470f
+        int ret             = -1;
a3470f
+        mode_t mode         = 0;
a3470f
+
a3470f
+        if ((!in_dict) || (!in_stbuf) || (!out_dict)) {
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        /* We need this only for files */
a3470f
+        if (!(IA_ISREG (in_stbuf->ia_type))) {
a3470f
+                ret = 0;
a3470f
+                goto out;
a3470f
+        }
a3470f
+
a3470f
+        /* Nobody asked for this */
a3470f
+        if (!dict_get (in_dict, DHT_MODE_IN_XDATA_KEY)) {
a3470f
+                ret = 0;
a3470f
+                goto out;
a3470f
+        }
a3470f
+        mode = st_mode_from_ia (in_stbuf->ia_prot, in_stbuf->ia_type);
a3470f
+
a3470f
+        ret = dict_set_int32 (out_dict, DHT_MODE_IN_XDATA_KEY, mode);
a3470f
+
a3470f
+out:
a3470f
+        return ret;
a3470f
+}
a3470f
+
a3470f
+
a3470f
 static gf_boolean_t
a3470f
 posix_xattr_ignorable (char *key)
a3470f
 {
a3470f
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
a3470f
index 8aeca3b..a412e6d 100644
a3470f
--- a/xlators/storage/posix/src/posix.c
a3470f
+++ b/xlators/storage/posix/src/posix.c
a3470f
@@ -6146,7 +6146,9 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
a3470f
         if (!xdata_rsp) {
a3470f
                 op_ret = -1;
a3470f
                 op_errno = ENOMEM;
a3470f
+                goto out;
a3470f
         }
a3470f
+        posix_set_mode_in_dict (xdata, xdata_rsp, &stbuf);
a3470f
 out:
a3470f
 
a3470f
         STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp,
a3470f
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
a3470f
index ae9fb08..8e40e6f 100644
a3470f
--- a/xlators/storage/posix/src/posix.h
a3470f
+++ b/xlators/storage/posix/src/posix.h
a3470f
@@ -353,4 +353,8 @@ posix_fdget_objectsignature (int, dict_t *);
a3470f
 
a3470f
 gf_boolean_t
a3470f
 posix_is_bulk_removexattr (char *name, dict_t *dict);
a3470f
+
a3470f
+int32_t
a3470f
+posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict,
a3470f
+                        struct iatt *in_stbuf);
a3470f
 #endif /* _POSIX_H */
a3470f
-- 
a3470f
1.8.3.1
a3470f