12a457
From f2433f5d83a58f5cb5c9257c9c5e10da8060da19 Mon Sep 17 00:00:00 2001
12a457
From: Raghavendra G <rgowdapp@redhat.com>
12a457
Date: Mon, 25 Apr 2016 15:09:01 +0530
12a457
Subject: [PATCH 086/104] cluster/distribute: detect stale layouts in entry fops
12a457
12a457
dht_mkdir ()
12a457
{
12a457
      first-hashed-subvol = hashed-subvol for "bname" in in-memory
12a457
                            layout of "parent";
12a457
      inodelk (SETLKW, parent, "LAYOUT_HEAL_DOMAIN", "can be any
12a457
               subvol, but we choose first-hashed-subvol randomly");
12a457
      {
12a457
begin:
12a457
            hashed-subvol = hashed-subvol for "bname" in in-memory
12a457
                            layout of "parent";
12a457
            hash-range = extract hashe-range from layout of "parent";
12a457
12a457
            ret = mkdir (parent/bname, hashed-subvol, hash-range);
12a457
            if (ret == "hash-value doesn't fall into layout stored on
12a457
                       the brick (this error is returned by posix-mkdir)")
12a457
            {
12a457
                refresh_parent_layout ();
12a457
                goto begin;
12a457
            }
12a457
12a457
      }
12a457
      inodelk (UNLCK, parent, "LAYOUT_HEAL_DOMAIN",
12a457
               "first-hashed-subvol");
12a457
12a457
      proceed with other parts of dht_mkdir;
12a457
}
12a457
12a457
posix_mkdir (parent/bname, client-hash-range)
12a457
{
12a457
12a457
       disk-hash-range = getxattr (parent, "dht-layout-key");
12a457
       if (disk-hash-range != client-hash-range) {
12a457
              fail-with-error ("hash-value doesn't fall into layout
12a457
                                stored on the brick");
12a457
              return 0;
12a457
       }
12a457
12a457
       continue-with-posix-mkdir;
12a457
}
12a457
12a457
Similar changes need to be done for dentry operations like create,
12a457
symlink, link, unlink, rmdir, rename. These will be addressed in
12a457
subsequent patches. This patch addresses only mkdir codepath.
12a457
12a457
This change breaks stripe tests, as on some striped subvols dht layout
12a457
xattrs are not set for some reason. This results in failure of
12a457
mkdir. Since striped volumes are always created with dht, some tests
12a457
associated with stripe also fail. So, I am making following tests
12a457
changes (since stripe is out of maintainance):
12a457
* modify ./tests/basic/rpc-coverage.t to not to use striped volumes
12a457
* mark all (2) tests in tests/bugs/stripe/ as bad tests
12a457
12a457
Change-Id: I7d8c26c5258be112e55c3e3ef206ccd5af778786
12a457
BUG: 1323042
12a457
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/72931
12a457
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
12a457
---
12a457
 libglusterfs/src/glusterfs.h               |    2 +
12a457
 tests/basic/rpc-coverage.t                 |    2 +-
12a457
 tests/bugs/stripe/bug-1002207.t            |    2 +
12a457
 tests/bugs/stripe/bug-1111454.t            |    2 +
12a457
 xlators/cluster/dht/src/dht-common.c       |  633 ++++++++++++++++++++++++++-
12a457
 xlators/cluster/dht/src/dht-common.h       |    8 +-
12a457
 xlators/cluster/dht/src/dht-helper.c       |    5 +
12a457
 xlators/cluster/dht/src/dht-layout.c       |   16 +
12a457
 xlators/cluster/dht/src/dht-messages.h     |   13 +-
12a457
 xlators/storage/posix/src/posix-messages.h |   12 +-
12a457
 xlators/storage/posix/src/posix.c          |  116 +++++-
12a457
 11 files changed, 770 insertions(+), 41 deletions(-)
12a457
12a457
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
12a457
index d1db087..dfaa623 100644
12a457
--- a/libglusterfs/src/glusterfs.h
12a457
+++ b/libglusterfs/src/glusterfs.h
12a457
@@ -94,6 +94,8 @@
12a457
 #define GF_READDIR_SKIP_DIRS       "readdir-filter-directories"
12a457
 
12a457
 #define BD_XATTR_KEY             "user.glusterfs"
12a457
+#define GF_PREOP_PARENT_KEY      "glusterfs.preop.parent.key"
12a457
+#define GF_PREOP_CHECK_FAILED    "glusterfs.preop.check.failed"
12a457
 
12a457
 #define XATTR_IS_PATHINFO(x)  ((strncmp (x, GF_XATTR_PATHINFO_KEY,       \
12a457
                                         strlen (x)) == 0) ||             \
12a457
diff --git a/tests/basic/rpc-coverage.t b/tests/basic/rpc-coverage.t
12a457
index f8ade59..a76ba70 100644
12a457
--- a/tests/basic/rpc-coverage.t
12a457
+++ b/tests/basic/rpc-coverage.t
12a457
@@ -9,7 +9,7 @@ TEST glusterd
12a457
 TEST pidof glusterd
12a457
 TEST $CLI volume info;
12a457
 
12a457
-TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
12a457
+TEST $CLI volume create $V0 replica 2  $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
12a457
 
12a457
 EXPECT "$V0" volinfo_field $V0 'Volume Name';
12a457
 EXPECT 'Created' volinfo_field $V0 'Status';
12a457
diff --git a/tests/bugs/stripe/bug-1002207.t b/tests/bugs/stripe/bug-1002207.t
12a457
index 1f8e46b..c58a6e2 100644
12a457
--- a/tests/bugs/stripe/bug-1002207.t
12a457
+++ b/tests/bugs/stripe/bug-1002207.t
12a457
@@ -51,3 +51,5 @@ TEST $CLI volume delete $V0;
12a457
 TEST ! $CLI volume info $V0;
12a457
 
12a457
 cleanup;
12a457
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
12a457
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
12a457
diff --git a/tests/bugs/stripe/bug-1111454.t b/tests/bugs/stripe/bug-1111454.t
12a457
index 05f6934..1509dd7 100644
12a457
--- a/tests/bugs/stripe/bug-1111454.t
12a457
+++ b/tests/bugs/stripe/bug-1111454.t
12a457
@@ -16,3 +16,5 @@ TEST touch $M0/dir/file
12a457
 TEST ln -s file $M0/dir/symlinkfile
12a457
 TEST ls -lR $M0
12a457
 cleanup
12a457
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
12a457
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
12a457
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
12a457
index ed57d5a..8e0dd28 100644
12a457
--- a/xlators/cluster/dht/src/dht-common.c
12a457
+++ b/xlators/cluster/dht/src/dht-common.c
12a457
@@ -5371,7 +5371,6 @@ out:
12a457
         return 0;
12a457
 }
12a457
 
12a457
-
12a457
 int32_t
12a457
 dht_mknod_do (call_frame_t *frame)
12a457
 {
12a457
@@ -5556,6 +5555,357 @@ err:
12a457
         return -1;
12a457
 }
12a457
 
12a457
+int
12a457
+dht_refresh_parent_layout_resume (call_frame_t *frame, xlator_t *this, int ret,
12a457
+                                  int invoke_cbk)
12a457
+{
12a457
+        dht_local_t  *local        = NULL, *parent_local = NULL;
12a457
+        call_stub_t  *stub         = NULL;
12a457
+        call_frame_t *parent_frame = NULL;
12a457
+
12a457
+        local = frame->local;
12a457
+
12a457
+        stub = local->stub;
12a457
+        local->stub = NULL;
12a457
+
12a457
+        parent_frame = stub->frame;
12a457
+        parent_local = parent_frame->local;
12a457
+
12a457
+        if (ret < 0) {
12a457
+                parent_local->op_ret = -1;
12a457
+                parent_local->op_errno = local->op_errno
12a457
+                        ? local->op_errno : EIO;
12a457
+        } else {
12a457
+                parent_local->op_ret = 0;
12a457
+        }
12a457
+
12a457
+        call_resume (stub);
12a457
+
12a457
+        DHT_STACK_DESTROY (frame);
12a457
+
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+
12a457
+int
12a457
+dht_refresh_parent_layout_done (call_frame_t *frame)
12a457
+{
12a457
+        dht_local_t *local = NULL;
12a457
+        int          ret   = 0;
12a457
+
12a457
+        local = frame->local;
12a457
+
12a457
+        if (local->op_ret < 0) {
12a457
+                ret = -1;
12a457
+                goto resume;
12a457
+        }
12a457
+
12a457
+        dht_layout_set (frame->this, local->loc.inode,
12a457
+                        local->selfheal.refreshed_layout);
12a457
+
12a457
+resume:
12a457
+        dht_refresh_parent_layout_resume (frame, frame->this, ret, 1);
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+
12a457
+int
12a457
+dht_handle_parent_layout_change (xlator_t *this, call_stub_t *stub)
12a457
+{
12a457
+        call_frame_t *refresh_frame = NULL, *frame = NULL;
12a457
+        dht_local_t  *refresh_local = NULL, *local = NULL;
12a457
+
12a457
+        frame = stub->frame;
12a457
+        local = frame->local;
12a457
+
12a457
+        refresh_frame = copy_frame (frame);
12a457
+        refresh_local = dht_local_init (refresh_frame, NULL, NULL,
12a457
+                                        stub->fop);
12a457
+
12a457
+        refresh_local->loc.inode = inode_ref (local->loc.parent);
12a457
+        gf_uuid_copy (refresh_local->loc.gfid, local->loc.parent->gfid);
12a457
+
12a457
+        refresh_local->stub = stub;
12a457
+
12a457
+        refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume;
12a457
+        refresh_local->refresh_layout_done = dht_refresh_parent_layout_done;
12a457
+
12a457
+        dht_refresh_layout (refresh_frame);
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+int32_t
12a457
+dht_unlock_parent_layout_during_entry_fop_done (call_frame_t *frame,
12a457
+                                                void *cookie,
12a457
+                                                xlator_t *this,
12a457
+                                                int32_t op_ret,
12a457
+                                                int32_t op_errno,
12a457
+                                                dict_t *xdata)
12a457
+{
12a457
+        dht_local_t *local                   = NULL;
12a457
+        char          gfid[GF_UUID_BUF_SIZE] = {0};
12a457
+
12a457
+        local = frame->local;
12a457
+        gf_uuid_unparse (local->lock.locks[0]->loc.inode->gfid, gfid);
12a457
+
12a457
+        if (op_ret < 0) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "unlock failed on gfid: %s, stale lock might be left "
12a457
+                        "in DHT_LAYOUT_HEAL_DOMAIN", gfid);
12a457
+        }
12a457
+
12a457
+        DHT_STACK_DESTROY (frame);
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+int32_t
12a457
+dht_unlock_parent_layout_during_entry_fop (call_frame_t *frame)
12a457
+{
12a457
+        dht_local_t  *local                   = NULL, *lock_local = NULL;
12a457
+        call_frame_t *lock_frame              = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+
12a457
+        local = frame->local;
12a457
+
12a457
+        gf_uuid_unparse (local->loc.parent->gfid, pgfid);
12a457
+
12a457
+        lock_frame = copy_frame (frame);
12a457
+        if (lock_frame == NULL) {
12a457
+                gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "copy frame failed", pgfid, local->loc.name,
12a457
+                        local->loc.path);
12a457
+                goto done;
12a457
+        }
12a457
+
12a457
+        lock_local = mem_get0 (THIS->local_pool);
12a457
+        if (lock_local == NULL) {
12a457
+                gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "local creation failed", pgfid, local->loc.name,
12a457
+                        local->loc.path);
12a457
+                goto done;
12a457
+        }
12a457
+
12a457
+        lock_frame->local = lock_local;
12a457
+
12a457
+        lock_local->lock.locks = local->lock.locks;
12a457
+        lock_local->lock.lk_count = local->lock.lk_count;
12a457
+
12a457
+        local->lock.locks = NULL;
12a457
+        local->lock.lk_count = 0;
12a457
+
12a457
+        dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
12a457
+                            lock_local->lock.lk_count,
12a457
+                            dht_unlock_parent_layout_during_entry_fop_done);
12a457
+
12a457
+done:
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+int32_t
12a457
+dht_guard_parent_layout_during_entry_fop_cbk (call_frame_t *frame, void *cookie,
12a457
+                                              xlator_t *this, int32_t op_ret,
12a457
+                                              int32_t op_errno, dict_t *xdata)
12a457
+{
12a457
+        dht_local_t *local = NULL;
12a457
+        call_stub_t *stub  = NULL;
12a457
+
12a457
+        local = frame->local;
12a457
+        stub = local->stub;
12a457
+        local->stub = NULL;
12a457
+
12a457
+        if (op_ret < 0) {
12a457
+                local->op_ret = -1;
12a457
+                local->op_errno = op_errno;
12a457
+        } else {
12a457
+                local->op_ret = 0;
12a457
+        }
12a457
+
12a457
+        call_resume (stub);
12a457
+
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+int32_t
12a457
+dht_guard_parent_layout_during_entry_fop (xlator_t *subvol, call_stub_t *stub)
12a457
+{
12a457
+        dht_local_t   *local                  = NULL;
12a457
+        int            count                  = 1,    ret = -1;
12a457
+        dht_lock_t   **lk_array               = NULL;
12a457
+        loc_t         *loc                    = NULL;
12a457
+        xlator_t      *hashed_subvol          = NULL, *this = NULL;;
12a457
+        call_frame_t  *frame                  = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+        loc_t          parent                 = {0, };
12a457
+        int32_t       *parent_disk_layout     = NULL;
12a457
+        dht_layout_t  *parent_layout          = NULL;
12a457
+        dht_conf_t    *conf                   = NULL;
12a457
+
12a457
+        GF_VALIDATE_OR_GOTO ("dht", stub, err);
12a457
+
12a457
+        frame = stub->frame;
12a457
+        this = frame->this;
12a457
+
12a457
+        conf = this->private;
12a457
+
12a457
+        local = frame->local;
12a457
+
12a457
+        local->stub = stub;
12a457
+
12a457
+        /* TODO: recheck whether we should lock on src or dst if we do similar
12a457
+         * stale layout checks for rename.
12a457
+         */
12a457
+        loc = &stub->args.loc;
12a457
+
12a457
+        gf_uuid_unparse (loc->parent->gfid, pgfid);
12a457
+
12a457
+        if (local->params == NULL) {
12a457
+                local->params = dict_new ();
12a457
+                if (local->params == NULL) {
12a457
+                        local->op_errno = ENOMEM;
12a457
+                        gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                                DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                                "%s (%s/%s) (path: %s): "
12a457
+                                "dict allocation failed",
12a457
+                                gf_fop_list[stub->fop],
12a457
+                                pgfid, loc->name, loc->path);
12a457
+                        goto err;
12a457
+                }
12a457
+        }
12a457
+
12a457
+        hashed_subvol = dht_subvol_get_hashed (this, loc);
12a457
+        if (hashed_subvol == NULL) {
12a457
+                local->op_errno = EINVAL;
12a457
+
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "hashed subvolume not found", gf_fop_list[stub->fop],
12a457
+                        pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        parent_layout = dht_layout_get (this, loc->parent);
12a457
+
12a457
+        ret = dht_disk_layout_extract_for_subvol (this, parent_layout,
12a457
+                                                  hashed_subvol,
12a457
+                                                  &parent_disk_layout);
12a457
+        if (ret == -1) {
12a457
+                local->op_errno = EINVAL;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "extracting in-memory layout of parent failed. ",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout,
12a457
+                sizeof (local->parent_disk_layout));
12a457
+
12a457
+        dht_layout_unref (this, parent_layout);
12a457
+        parent_layout = NULL;
12a457
+
12a457
+        ret = dict_set_str (local->params, GF_PREOP_PARENT_KEY,
12a457
+                            conf->xattr_name);
12a457
+        if (ret < 0) {
12a457
+                local->op_errno = -ret;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "setting %s key in params dictionary failed. ",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path,
12a457
+                        GF_PREOP_PARENT_KEY);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        ret = dict_set_bin (local->params, conf->xattr_name, parent_disk_layout,
12a457
+                            4 * 4);
12a457
+        if (ret < 0) {
12a457
+                local->op_errno = -ret;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "setting parent-layout in params dictionary failed. ",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        parent_disk_layout = NULL;
12a457
+
12a457
+        parent.inode = inode_ref (loc->parent);
12a457
+        gf_uuid_copy (parent.gfid, loc->parent->gfid);
12a457
+
12a457
+        lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
12a457
+
12a457
+        if (lk_array == NULL) {
12a457
+                local->op_errno = ENOMEM;
12a457
+
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "calloc failure",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
12a457
+
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        lk_array[0] = dht_lock_new (frame->this, hashed_subvol, &parent,
12a457
+                                    F_RDLCK, DHT_LAYOUT_HEAL_DOMAIN);
12a457
+
12a457
+        if (lk_array[0] == NULL) {
12a457
+                local->op_errno = ENOMEM;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "lock allocation failed",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
12a457
+
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        local->lock.locks = lk_array;
12a457
+        local->lock.lk_count = count;
12a457
+
12a457
+        ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
12a457
+                                    dht_guard_parent_layout_during_entry_fop_cbk);
12a457
+
12a457
+        if (ret < 0) {
12a457
+                local->op_errno = EIO;
12a457
+                local->lock.locks = NULL;
12a457
+                local->lock.lk_count = 0;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "%s (%s/%s) (path: %s): "
12a457
+                        "dht_blocking_inodelk failed",
12a457
+                        gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
12a457
+
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        loc_wipe (&parent);
12a457
+
12a457
+        return 0;
12a457
+err:
12a457
+        if (lk_array != NULL) {
12a457
+                dht_lock_array_free (lk_array, count);
12a457
+                GF_FREE (lk_array);
12a457
+        }
12a457
+
12a457
+        loc_wipe (&parent);
12a457
+
12a457
+        if (parent_disk_layout != NULL)
12a457
+                GF_FREE (parent_disk_layout);
12a457
+
12a457
+        if (parent_layout != NULL)
12a457
+                dht_layout_unref (this, parent_layout);
12a457
+
12a457
+        return -1;
12a457
+}
12a457
 
12a457
 int
12a457
 dht_mknod (call_frame_t *frame, xlator_t *this,
12a457
@@ -6685,15 +7035,154 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
                       xlator_t *this, int op_ret, int op_errno,
12a457
                       inode_t *inode, struct iatt *stbuf,
12a457
                       struct iatt *preparent, struct iatt *postparent,
12a457
+                      dict_t *xdata);
12a457
+
12a457
+int
12a457
+dht_mkdir_helper (call_frame_t *frame, xlator_t *this,
12a457
+                  loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
12a457
+{
12a457
+        dht_local_t  *local                   = NULL;
12a457
+        dht_conf_t   *conf                    = NULL;
12a457
+        int           op_errno                = -1, ret = -1;
12a457
+        xlator_t     *hashed_subvol           = NULL;
12a457
+        int32_t      *parent_disk_layout      = NULL;
12a457
+        dht_layout_t *parent_layout           = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+
12a457
+        VALIDATE_OR_GOTO (frame, err);
12a457
+        VALIDATE_OR_GOTO (this, err);
12a457
+        VALIDATE_OR_GOTO (loc, err);
12a457
+        VALIDATE_OR_GOTO (loc->inode, err);
12a457
+        VALIDATE_OR_GOTO (loc->path, err);
12a457
+        VALIDATE_OR_GOTO (this->private, err);
12a457
+
12a457
+        gf_uuid_unparse (loc->parent->gfid, pgfid);
12a457
+
12a457
+        conf = this->private;
12a457
+        local = frame->local;
12a457
+
12a457
+        if (local->op_ret == -1) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): refreshing parent layout "
12a457
+                        "failed.", pgfid, loc->name,
12a457
+                        loc->path);
12a457
+
12a457
+                op_errno = local->op_errno;
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        local->op_ret = -1;
12a457
+
12a457
+        hashed_subvol = dht_subvol_get_hashed (this, loc);
12a457
+        if (hashed_subvol == NULL) {
12a457
+                gf_msg_debug (this->name, 0,
12a457
+                              "mkdir (%s/%s) (path: %s): hashed subvol not "
12a457
+                              "found", pgfid, loc->name, loc->path);
12a457
+                op_errno = ENOENT;
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        local->hashed_subvol = hashed_subvol;
12a457
+
12a457
+        parent_layout = dht_layout_get (this, loc->parent);
12a457
+
12a457
+        ret = dht_disk_layout_extract_for_subvol (this, parent_layout,
12a457
+                                                  hashed_subvol,
12a457
+                                                  &parent_disk_layout);
12a457
+        if (ret == -1) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, EIO,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "extracting in-memory layout of parent failed. ",
12a457
+                        pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        if (memcmp (local->parent_disk_layout, parent_disk_layout,
12a457
+                    sizeof (local->parent_disk_layout)) == 0) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, EIO,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): loop detected. "
12a457
+                        "parent layout didn't change even though "
12a457
+                        "previous attempt of mkdir failed because of "
12a457
+                        "in-memory layout not matching with that on disk.",
12a457
+                        pgfid, loc->name, loc->path);
12a457
+                op_errno = EIO;
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout,
12a457
+                sizeof (local->parent_disk_layout));
12a457
+
12a457
+        dht_layout_unref (this, parent_layout);
12a457
+        parent_layout = NULL;
12a457
+
12a457
+        ret = dict_set_str (params, GF_PREOP_PARENT_KEY, conf->xattr_name);
12a457
+        if (ret < 0) {
12a457
+                local->op_errno = -ret;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "setting %s key in params dictionary failed. ",
12a457
+                        pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        ret = dict_set_bin (params, conf->xattr_name, parent_disk_layout,
12a457
+                            4 * 4);
12a457
+        if (ret < 0) {
12a457
+                local->op_errno = -ret;
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "setting parent-layout in params dictionary failed. "
12a457
+                        "mkdir (%s/%s) (path: %s)", pgfid, loc->name,
12a457
+                        loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        parent_disk_layout = NULL;
12a457
+
12a457
+        STACK_WIND (frame, dht_mkdir_hashed_cbk,
12a457
+                    hashed_subvol,
12a457
+                    hashed_subvol->fops->mkdir,
12a457
+                    loc, mode, umask, params);
12a457
+
12a457
+        return 0;
12a457
+
12a457
+err:
12a457
+        dht_unlock_parent_layout_during_entry_fop (frame);
12a457
+
12a457
+        op_errno = local ? local->op_errno : op_errno;
12a457
+        DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
12a457
+                          NULL, NULL);
12a457
+
12a457
+        if (parent_disk_layout != NULL)
12a457
+                GF_FREE (parent_disk_layout);
12a457
+
12a457
+        if (parent_layout != NULL)
12a457
+                dht_layout_unref (this, parent_layout);
12a457
+
12a457
+        return 0;
12a457
+}
12a457
+
12a457
+int
12a457
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
+                      xlator_t *this, int op_ret, int op_errno,
12a457
+                      inode_t *inode, struct iatt *stbuf,
12a457
+                      struct iatt *preparent, struct iatt *postparent,
12a457
                       dict_t *xdata)
12a457
 {
12a457
-        dht_local_t  *local = NULL;
12a457
-        int           ret = -1;
12a457
-        call_frame_t *prev = NULL;
12a457
-        dht_layout_t *layout = NULL;
12a457
-        dht_conf_t   *conf = NULL;
12a457
-        int           i = 0;
12a457
-        xlator_t     *hashed_subvol = NULL;
12a457
+        dht_local_t  *local                   = NULL;
12a457
+        int           ret                     = -1;
12a457
+        call_frame_t *prev                    = NULL;
12a457
+        dht_layout_t *layout                  = NULL;
12a457
+        dht_conf_t   *conf                    = NULL;
12a457
+        int           i                       = 0;
12a457
+        xlator_t     *hashed_subvol           = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+        gf_boolean_t  parent_layout_changed   = _gf_false;
12a457
+        call_stub_t  *stub                    = NULL;
12a457
 
12a457
         VALIDATE_OR_GOTO (this->private, err);
12a457
 
12a457
@@ -6703,9 +7192,44 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
         conf = this->private;
12a457
         hashed_subvol = local->hashed_subvol;
12a457
 
12a457
+        gf_uuid_unparse (local->loc.parent->gfid, pgfid);
12a457
+
12a457
         if (gf_uuid_is_null (local->loc.gfid) && !op_ret)
12a457
                 gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
12a457
 
12a457
+        if (op_ret == -1) {
12a457
+                local->op_errno = op_errno;
12a457
+
12a457
+                parent_layout_changed = dict_get (xdata, GF_PREOP_CHECK_FAILED)
12a457
+                        ? 1 : 0;
12a457
+                if (parent_layout_changed) {
12a457
+                        gf_msg (this->name, GF_LOG_INFO, 0,
12a457
+                                DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                                "mkdir (%s/%s) (path: %s): parent layout "
12a457
+                                "changed. Attempting a refresh and then a "
12a457
+                                "retry", pgfid, local->loc.name,
12a457
+                                local->loc.path);
12a457
+
12a457
+                        stub = fop_mkdir_stub (frame, dht_mkdir_helper,
12a457
+                                               &local->loc, local->mode,
12a457
+                                               local->umask, local->params);
12a457
+                        if (stub == NULL) {
12a457
+                                goto err;
12a457
+                        }
12a457
+
12a457
+                        dht_handle_parent_layout_change (this, stub);
12a457
+                        stub = NULL;
12a457
+
12a457
+                        return 0;
12a457
+                }
12a457
+
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        dht_unlock_parent_layout_during_entry_fop (frame);
12a457
+        dict_del (local->params, GF_PREOP_PARENT_KEY);
12a457
+        dict_del (local->params, conf->xattr_name);
12a457
+
12a457
         if (dht_is_subvol_filled (this, hashed_subvol))
12a457
                 ret = dht_layout_merge (this, layout, prev->this,
12a457
                                         -1, ENOSPC, NULL);
12a457
@@ -6721,10 +7245,6 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
                         "%s: failed to merge layouts for subvol %s",
12a457
                         local->loc.path, prev->this->name);
12a457
 
12a457
-        if (op_ret == -1) {
12a457
-                local->op_errno = op_errno;
12a457
-                goto err;
12a457
-        }
12a457
         local->op_ret = 0;
12a457
 
12a457
         dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
12a457
@@ -6739,6 +7259,7 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
                 dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
12a457
                                         &local->loc, layout);
12a457
         }
12a457
+
12a457
         for (i = 0; i < conf->subvolume_cnt; i++) {
12a457
                 if (conf->subvolumes[i] == hashed_subvol)
12a457
                         continue;
12a457
@@ -6749,21 +7270,64 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
12a457
         }
12a457
         return 0;
12a457
 err:
12a457
+        if (local->op_ret != 0)
12a457
+                dht_unlock_parent_layout_during_entry_fop (frame);
12a457
+
12a457
         DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
12a457
                           NULL, NULL);
12a457
+        if (stub) {
12a457
+                call_stub_destroy (stub);
12a457
+        }
12a457
+
12a457
         return 0;
12a457
 }
12a457
 
12a457
+int
12a457
+dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this,
12a457
+                                   loc_t *loc, mode_t mode, mode_t umask,
12a457
+                                   dict_t *params)
12a457
+{
12a457
+        dht_local_t *local                    = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+
12a457
+        local = frame->local;
12a457
+
12a457
+        gf_uuid_unparse (loc->parent->gfid, pgfid);
12a457
+
12a457
+        if (local->op_ret < 0) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "Acquiring lock on parent to guard against "
12a457
+                        "layout-change failed.", pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        local->op_ret = -1;
12a457
+
12a457
+        STACK_WIND (frame, dht_mkdir_hashed_cbk,
12a457
+                    local->hashed_subvol,
12a457
+                    local->hashed_subvol->fops->mkdir,
12a457
+                    loc, mode, umask, params);
12a457
+
12a457
+        return 0;
12a457
+err:
12a457
+        DHT_STACK_UNWIND (mkdir, frame, -1, local->op_errno, NULL, NULL, NULL,
12a457
+                          NULL, NULL);
12a457
+
12a457
+        return 0;
12a457
+}
12a457
 
12a457
 int
12a457
 dht_mkdir (call_frame_t *frame, xlator_t *this,
12a457
            loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
12a457
 {
12a457
-        dht_local_t  *local  = NULL;
12a457
-        dht_conf_t   *conf = NULL;
12a457
-        int           op_errno = -1;
12a457
-        xlator_t     *hashed_subvol = NULL;
12a457
-
12a457
+        dht_local_t  *local                   = NULL;
12a457
+        dht_conf_t   *conf                    = NULL;
12a457
+        int           op_errno                = -1, ret = -1;
12a457
+        xlator_t     *hashed_subvol           = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
+        call_stub_t  *stub                    = NULL;
12a457
 
12a457
         VALIDATE_OR_GOTO (frame, err);
12a457
         VALIDATE_OR_GOTO (this, err);
12a457
@@ -6772,6 +7336,8 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
12a457
         VALIDATE_OR_GOTO (loc->path, err);
12a457
         VALIDATE_OR_GOTO (this->private, err);
12a457
 
12a457
+        gf_uuid_unparse (loc->parent->gfid, pgfid);
12a457
+
12a457
         conf = this->private;
12a457
 
12a457
         dht_get_du_info (frame, this, loc);
12a457
@@ -6787,14 +7353,17 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
12a457
                 gf_msg_debug (this->name, 0,
12a457
                               "hashed subvol not found for %s",
12a457
                               loc->path);
12a457
-                op_errno = EIO;
12a457
+                local->op_errno = EIO;
12a457
                 goto err;
12a457
         }
12a457
 
12a457
+
12a457
         local->hashed_subvol = hashed_subvol;
12a457
         local->mode = mode;
12a457
         local->umask = umask;
12a457
-        local->params = dict_ref (params);
12a457
+        if (params)
12a457
+                local->params = dict_ref (params);
12a457
+
12a457
         local->inode  = inode_ref (loc->inode);
12a457
 
12a457
         local->layout = dht_layout_new (this, conf->subvolume_cnt);
12a457
@@ -6813,15 +7382,31 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
12a457
         else
12a457
                 local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
12a457
 
12a457
-        STACK_WIND (frame, dht_mkdir_hashed_cbk,
12a457
-                    hashed_subvol,
12a457
-                    hashed_subvol->fops->mkdir,
12a457
-                    loc, mode, umask, params);
12a457
+
12a457
+        stub = fop_mkdir_stub (frame, dht_mkdir_guard_parent_layout_cbk, loc,
12a457
+                               mode, umask, params);
12a457
+        if (stub == NULL) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s): "
12a457
+                        "creating stub failed.", pgfid, loc->name, loc->path);
12a457
+                local->op_errno = ENOMEM;
12a457
+                goto err;
12a457
+        }
12a457
+
12a457
+        ret = dht_guard_parent_layout_during_entry_fop (this, stub);
12a457
+        if (ret < 0) {
12a457
+                gf_msg (this->name, GF_LOG_WARNING, 0,
12a457
+                        DHT_MSG_PARENT_LAYOUT_CHANGED,
12a457
+                        "mkdir (%s/%s) (path: %s) cannot wind lock request to "
12a457
+                        "guard parent layout", pgfid, loc->name, loc->path);
12a457
+                goto err;
12a457
+        }
12a457
 
12a457
         return 0;
12a457
 
12a457
 err:
12a457
-        op_errno = (op_errno == -1) ? errno : op_errno;
12a457
+        op_errno = local ? local->op_errno : op_errno;
12a457
         DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
12a457
                           NULL, NULL);
12a457
 
12a457
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
12a457
index d06224c..431a6d5 100644
12a457
--- a/xlators/cluster/dht/src/dht-common.h
12a457
+++ b/xlators/cluster/dht/src/dht-common.h
12a457
@@ -18,6 +18,7 @@
12a457
 
12a457
 #include "dht-mem-types.h"
12a457
 #include "dht-messages.h"
12a457
+#include "call-stub.h"
12a457
 #include "libxlator.h"
12a457
 #include "syncop.h"
12a457
 #include "refcount.h"
12a457
@@ -286,6 +287,9 @@ struct dht_local {
12a457
                 int                 op_ret;
12a457
                 int                 op_errno;
12a457
         } lock;
12a457
+
12a457
+        call_stub_t *stub;
12a457
+        int32_t      parent_disk_layout[4];
12a457
 };
12a457
 typedef struct dht_local dht_local_t;
12a457
 
12a457
@@ -709,7 +713,9 @@ int     dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
12a457
                              int       pos, int32_t **disk_layout_p);
12a457
 int dht_disk_layout_merge (xlator_t   *this, dht_layout_t *layout,
12a457
                            int         pos, void *disk_layout_raw, int disk_layout_len);
12a457
-
12a457
+int
12a457
+dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout,
12a457
+                                    xlator_t *subvol, int32_t **disk_layout_p);
12a457
 
12a457
 int dht_frame_return (call_frame_t *frame);
12a457
 
12a457
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
12a457
index 881db81..81d49db 100644
12a457
--- a/xlators/cluster/dht/src/dht-helper.c
12a457
+++ b/xlators/cluster/dht/src/dht-helper.c
12a457
@@ -626,6 +626,11 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
12a457
         if (local->rebalance.iobref)
12a457
                 iobref_unref (local->rebalance.iobref);
12a457
 
12a457
+        if (local->stub) {
12a457
+                call_stub_destroy (local->stub);
12a457
+                local->stub = NULL;
12a457
+        }
12a457
+
12a457
         mem_put (local);
12a457
 }
12a457
 
12a457
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
12a457
index 4da3df2..bc5b5a9 100644
12a457
--- a/xlators/cluster/dht/src/dht-layout.c
12a457
+++ b/xlators/cluster/dht/src/dht-layout.c
12a457
@@ -285,6 +285,22 @@ out:
12a457
         return ret;
12a457
 }
12a457
 
12a457
+int
12a457
+dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout,
12a457
+                                    xlator_t *subvol, int32_t **disk_layout_p)
12a457
+{
12a457
+        int i = 0;
12a457
+
12a457
+        for (i = 0; i < layout->cnt; i++) {
12a457
+                if (layout->list[i].xlator == subvol)
12a457
+                        break;
12a457
+        }
12a457
+
12a457
+        if (i == layout->cnt)
12a457
+                return -1;
12a457
+
12a457
+        return dht_disk_layout_extract (this, layout, i, disk_layout_p);
12a457
+}
12a457
 
12a457
 int
12a457
 dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
12a457
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
12a457
index f1fdd4b..eb0f1c8 100644
12a457
--- a/xlators/cluster/dht/src/dht-messages.h
12a457
+++ b/xlators/cluster/dht/src/dht-messages.h
12a457
@@ -45,7 +45,7 @@
12a457
  */
12a457
 
12a457
 #define GLFS_DHT_BASE                   GLFS_MSGID_COMP_DHT
12a457
-#define GLFS_DHT_NUM_MESSAGES           112
12a457
+#define GLFS_DHT_NUM_MESSAGES           114
12a457
 #define GLFS_MSGID_END          (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
12a457
 
12a457
 /* Messages with message IDs */
12a457
@@ -1047,5 +1047,16 @@
12a457
 
12a457
 #define DHT_MSG_FD_CTX_SET_FAILED         (GLFS_DHT_BASE + 112)
12a457
 
12a457
+/*
12a457
+ * missing msg-ids are because of divergence of code from
12a457
+ * master. However, retaining the same msg-id (with holes in msg-id
12a457
+ * namespace) for consistency across branches.
12a457
+ * @messageid 109114
12a457
+ * @diagnosis
12a457
+ * @recommendedaction None
12a457
+ */
12a457
+#define DHT_MSG_PARENT_LAYOUT_CHANGED  (GLFS_DHT_BASE + 114)
12a457
+
12a457
+
12a457
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
12a457
 #endif /* _DHT_MESSAGES_H_ */
12a457
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
12a457
index 4efdef0..e2d4aac 100644
12a457
--- a/xlators/storage/posix/src/posix-messages.h
12a457
+++ b/xlators/storage/posix/src/posix-messages.h
12a457
@@ -45,7 +45,7 @@
12a457
  */
12a457
 
12a457
 #define POSIX_COMP_BASE         GLFS_MSGID_COMP_POSIX
12a457
-#define GLFS_NUM_MESSAGES       108
12a457
+#define GLFS_NUM_MESSAGES       109
12a457
 #define GLFS_MSGID_END          (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
12a457
 /* Messaged with message IDs */
12a457
 #define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
12a457
@@ -918,6 +918,16 @@
12a457
  */
12a457
 
12a457
 #define P_MSG_INODE_RESOLVE_FAILED              (POSIX_COMP_BASE + 108)
12a457
+
12a457
+/*!
12a457
+ * @messageid
12a457
+ * @diagnosis
12a457
+ * @recommendedaction
12a457
+ *
12a457
+ */
12a457
+
12a457
+#define P_MSG_PREOP_CHECK_FAILED              (POSIX_COMP_BASE + 109)
12a457
+
12a457
 /*!
12a457
  * @messageid
12a457
  * @diagnosis
12a457
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
12a457
index 01ba6d2..28835cb 100644
12a457
--- a/xlators/storage/posix/src/posix.c
12a457
+++ b/xlators/storage/posix/src/posix.c
12a457
@@ -1336,18 +1336,22 @@ int
12a457
 posix_mkdir (call_frame_t *frame, xlator_t *this,
12a457
              loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
12a457
 {
12a457
-        int32_t               op_ret        = -1;
12a457
-        int32_t               op_errno      = 0;
12a457
-        char                 *real_path     = NULL, *gfid_path = NULL;
12a457
-        char                 *par_path      = NULL;
12a457
-        struct iatt           stbuf         = {0, };
12a457
-        struct posix_private *priv          = NULL;
12a457
-        gid_t                 gid           = 0;
12a457
-        struct iatt           preparent     = {0,};
12a457
-        struct iatt           postparent    = {0,};
12a457
-        gf_boolean_t          entry_created = _gf_false, gfid_set = _gf_false;
12a457
-        void                 *uuid_req      = NULL;
12a457
-        ssize_t               size          = 0;
12a457
+        int32_t               op_ret          = -1;
12a457
+        int32_t               op_errno        = 0;
12a457
+        char                 *real_path       = NULL, *gfid_path = NULL;
12a457
+        char                 *par_path        = NULL, *xattr_name = NULL;
12a457
+        struct iatt           stbuf           = {0, };
12a457
+        struct posix_private *priv            = NULL;
12a457
+        gid_t                 gid             = 0;
12a457
+        struct iatt           preparent       = {0,};
12a457
+        struct iatt           postparent      = {0,};
12a457
+        gf_boolean_t          entry_created   = _gf_false, gfid_set = _gf_false;
12a457
+        void                 *uuid_req        = NULL;
12a457
+        ssize_t               size            = 0;
12a457
+        dict_t               *xdata_rsp       = NULL;
12a457
+        void                 *disk_xattr      = NULL, *arg_xattr = NULL;
12a457
+        data_t               *arg_data        = NULL;
12a457
+        char          pgfid[GF_UUID_BUF_SIZE] = {0};
12a457
 
12a457
         DECLARE_OLD_FS_ID_VAR;
12a457
 
12a457
@@ -1377,6 +1381,11 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
12a457
                 goto out;
12a457
         }
12a457
 
12a457
+        if (loc->parent)
12a457
+                gf_uuid_unparse (loc->parent->gfid, pgfid);
12a457
+        else
12a457
+                gf_uuid_unparse (loc->pargfid, pgfid);
12a457
+
12a457
         gid = frame->root->gid;
12a457
 
12a457
         op_ret = posix_pstat (this, NULL, real_path, &stbuf);
12a457
@@ -1420,6 +1429,84 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
12a457
                 mode |= S_ISGID;
12a457
         }
12a457
 
12a457
+        op_ret = dict_get_str (xdata, GF_PREOP_PARENT_KEY, &xattr_name);
12a457
+        if (xattr_name != NULL) {
12a457
+                arg_data = dict_get (xdata, xattr_name);
12a457
+                if (arg_data) {
12a457
+                        size = sys_lgetxattr (par_path, xattr_name, NULL, 0);
12a457
+                        if (size < 0) {
12a457
+                                op_ret = -1;
12a457
+                                op_errno = errno;
12a457
+                                gf_msg (this->name, GF_LOG_ERROR, errno,
12a457
+                                        P_MSG_PREOP_CHECK_FAILED,
12a457
+                                        "mkdir (%s/%s): getxattr on key (%s)"
12a457
+                                        " path (%s) failed ", pgfid,
12a457
+                                        loc->name, xattr_name,
12a457
+                                        par_path);
12a457
+                                goto out;
12a457
+                        }
12a457
+
12a457
+                        disk_xattr = alloca (size);
12a457
+                        if (disk_xattr == NULL) {
12a457
+                                op_ret = -1;
12a457
+                                op_errno = errno;
12a457
+                                gf_msg (this->name, GF_LOG_ERROR, errno,
12a457
+                                        P_MSG_PREOP_CHECK_FAILED,
12a457
+                                        "mkdir (%s/%s): alloca failed during"
12a457
+                                        " preop of mkdir (%s)", pgfid,
12a457
+                                        loc->name, real_path);
12a457
+                                goto out;
12a457
+                        }
12a457
+
12a457
+                        size = sys_lgetxattr (par_path, xattr_name,
12a457
+                                              disk_xattr, size);
12a457
+                        if (size < 0) {
12a457
+                                op_errno = errno;
12a457
+                                gf_msg (this->name, GF_LOG_ERROR, errno,
12a457
+                                        P_MSG_PREOP_CHECK_FAILED,
12a457
+                                        "mkdir (%s/%s): getxattr on key (%s)"
12a457
+                                        " path (%s) failed (%s)", pgfid,
12a457
+                                        loc->name, xattr_name,
12a457
+                                        par_path, strerror (errno));
12a457
+                                goto out;
12a457
+                        }
12a457
+
12a457
+                        if ((arg_data->len != size)
12a457
+                            || (memcmp (arg_data->data, disk_xattr, size))) {
12a457
+                                int ret = 0;
12a457
+                                gf_msg (this->name, GF_LOG_INFO, EIO,
12a457
+                                        P_MSG_PREOP_CHECK_FAILED,
12a457
+                                        "mkdir (%s/%s): failing preop of "
12a457
+                                        "mkdir (%s) as on-disk"
12a457
+                                        " xattr value differs from argument "
12a457
+                                        "value for key %s", pgfid, loc->name,
12a457
+                                        real_path, xattr_name);
12a457
+                                op_ret = -1;
12a457
+                                op_errno = EIO;
12a457
+
12a457
+                                xdata_rsp = dict_new ();
12a457
+                                if (xdata_rsp == NULL) {
12a457
+                                        gf_msg (this->name, GF_LOG_ERROR,
12a457
+                                                ENOMEM,
12a457
+                                                P_MSG_PREOP_CHECK_FAILED,
12a457
+                                                "mkdir (%s/%s):  "
12a457
+                                                "dict allocation failed", pgfid,
12a457
+                                                loc->name);
12a457
+                                        op_errno = ENOMEM;
12a457
+                                        goto out;
12a457
+                                }
12a457
+
12a457
+                                ret = dict_set_int8 (xdata_rsp,
12a457
+                                                     GF_PREOP_CHECK_FAILED, 1);
12a457
+                                goto out;
12a457
+                        }
12a457
+
12a457
+                        dict_del (xdata, xattr_name);
12a457
+                }
12a457
+
12a457
+                dict_del (xdata, GF_PREOP_PARENT_KEY);
12a457
+        }
12a457
+
12a457
         op_ret = mkdir (real_path, mode);
12a457
         if (op_ret == -1) {
12a457
                 op_errno = errno;
12a457
@@ -1483,7 +1570,7 @@ out:
12a457
 
12a457
         STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
12a457
                              (loc)?loc->inode:NULL, &stbuf, &preparent,
12a457
-                             &postparent, NULL);
12a457
+                             &postparent, xdata_rsp);
12a457
 
12a457
         if (op_ret < 0) {
12a457
                 if (entry_created)
12a457
@@ -1493,6 +1580,9 @@ out:
12a457
                         posix_gfid_unset (this, xdata);
12a457
         }
12a457
 
12a457
+        if (xdata_rsp)
12a457
+                dict_unref (xdata_rsp);
12a457
+
12a457
         return 0;
12a457
 }
12a457
 
12a457
-- 
12a457
1.7.1
12a457