From 3095da8cb7b78a755700b6edfaa9fab20d98df2b Mon Sep 17 00:00:00 2001
From: Susant Palai <spalai@redhat.com>
Date: Mon, 24 Aug 2015 03:04:41 -0400
Subject: [PATCH 293/304] cluster/dht: avoid mknod on decommissioned brick
BUG: 1225452
Change-Id: I838bce1a28d53d437def149d85d6e1770a292f19
Signed-off-by: Susant Palai <spalai@redhat.com>
Reviewed-on: http://review.gluster.org/11998
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Signed-off-by: Susant Palai <spalai@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/56186
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 366 ++++++++++++++++++++++++++++++----
xlators/cluster/dht/src/dht-common.h | 5 +
2 files changed, 335 insertions(+), 36 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ebc183d..5c1a693 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4995,8 +4995,22 @@ out:
* See dht_iatt_merge for reference.
*/
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
- preparent, postparent, xdata);
+
+ if (local && local->lock.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock (frame, this, op_ret);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+ }
+
return 0;
}
@@ -5029,24 +5043,269 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
+ if (local->lock.locks)
+ local->refresh_layout_unlock (frame, this, -1);
+
return 0;
}
int
+dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, dev_t rdev,
+ mode_t mode, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
+ } else {
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+
+ dht_linkfile_create (frame,
+ dht_mknod_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
+
+ }
+out:
+ return 0;
+}
+
+
+int32_t
+dht_mknod_do (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
+
+ methods = conf->methods;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf->methods, err);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe (&local->loc);
+
+ loc_copy (&local->loc, &local->loc2);
+
+ loc_wipe (&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search (this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in "
+ "layout for path=%s", local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_mknod_wind_to_avail_subvol (frame, this, subvol, &local->loc,
+ local->rdev, local->mode,
+ local->umask, local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock (frame, this, -1);
+
+ return 0;
+}
+
+
+int32_t
+dht_mknod_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_mknod_finish (call_frame_t *frame, xlator_t *this, int op_ret)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_mknod_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ if (op_ret == 0)
+ return 0;
+
+ DHT_STACK_UNWIND (mknod, frame, op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+dht_mknod_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "mknod lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_mknod_finish;
+
+ local->refresh_layout_done = dht_mknod_do;
+
+ dht_refresh_layout (frame);
+
+ return 0;
+err:
+ dht_mknod_finish (frame, this, -1);
+ return 0;
+}
+
+int32_t
+dht_mknod_lock (call_frame_t *frame, xlator_t *subvol)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ dht_mknod_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
+
+
+int
dht_mknod (call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- xlator_t *avail_subvol = NULL;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
+ conf = this->private;
+
dht_get_du_info (frame, this, loc);
local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
@@ -5064,42 +5323,77 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
goto err;
}
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a mknod call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
- subvol, subvol->fops->mknod, loc, mode,
- rdev, umask, params);
- } else {
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
- avail_subvol = dht_free_disk_available_subvol (this, subvol,
- local);
- if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
- files there */
+ gf_msg_debug (this->name, 0, "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s", subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy (&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
local->params = dict_ref (params);
- local->cached_subvol = avail_subvol;
- local->mode = mode;
local->rdev = rdev;
+ local->mode = mode;
local->umask = umask;
- dht_linkfile_create (frame,
- dht_mknod_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- } else {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
- STACK_WIND_COOKIE (frame, dht_newfile_cbk,
- (void *)subvol, subvol,
- subvol->fops->mknod, loc, mode,
- rdev, umask, params);
- }
+ loc_wipe (&local->loc);
+
+ ret = dht_build_parent_loc (this, &local->loc, loc,
+ &op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_mknod_lock (frame, subvol);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
+ }
}
+ dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode,
+ umask, params);
+
+done:
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index f583d30..1b5a084 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1078,4 +1078,9 @@ dht_layout_missing_dirs (dht_layout_t *layout);
int
dht_refresh_layout (call_frame_t *frame);
+
+int
+dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno);
+
#endif/* _DHT_H */
--
1.7.1