From a288ff3ea3b792dcafaa4ffa247d3c6032d68f10 Mon Sep 17 00:00:00 2001
From: Sakshi <sabansal@redhat.com>
Date: Thu, 16 Jul 2015 14:31:03 +0530
Subject: [PATCH 311/320] dht : lock on subvols to prevent lookup vs rmdir race
There is a possibility that while an rmdir is completed on
some non-hashed subvol and proceeding to others. A lookup
selfheal can recreate the same directory on those subvols
for which the rmdir had succeeded. The fix is to take a
blocking inodelk on the subvols before starting rmdir.
Since selfheal requires lock on all subvols, if an rmdir
is in progess acquiring locks will fail and vice versa.
Change-Id: I841a44758c3b88f5e04d1cb73ad36e0cac9fdabb
BUG: 1115367
Signed-off-by: Sakshi <sabansal@redhat.com>
Reviewed-on: http://review.gluster.org/11725
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/56595
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 180 +++++++++++++++++++++++++++----
xlators/cluster/dht/src/dht-common.h | 14 ++-
xlators/cluster/dht/src/dht-helper.c | 38 ++++++-
xlators/cluster/dht/src/dht-rename.c | 2 +-
xlators/cluster/dht/src/dht-selfheal.c | 181 +++++++++++++++++++++++---------
5 files changed, 331 insertions(+), 84 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5c1a693..f819aa6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -40,6 +40,10 @@ dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame);
int run_defrag = 0;
int
+dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
+
+
+int
dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value)
{
int ret = -1;
@@ -4514,7 +4518,6 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* corresponding hashed subvolume will take care of the
* directory entry.
*/
-
if (readdir_optimize) {
if (prev->this == local->first_up_subvol)
goto list;
@@ -4999,7 +5002,7 @@ out:
if (local && local->lock.locks) {
/* store op_errno for failure case*/
local->op_errno = op_errno;
- local->refresh_layout_unlock (frame, this, op_ret);
+ local->refresh_layout_unlock (frame, this, op_ret, 0);
if (op_ret == 0) {
DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno,
@@ -5044,7 +5047,7 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
return 0;
err:
if (local->lock.locks)
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
return 0;
}
@@ -5149,7 +5152,7 @@ dht_mknod_do (call_frame_t *frame)
local->umask, local->params);
return 0;
err:
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
return 0;
}
@@ -5164,7 +5167,7 @@ dht_mknod_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int32_t
-dht_mknod_finish (call_frame_t *frame, xlator_t *this, int op_ret)
+dht_mknod_finish (call_frame_t *frame, xlator_t *this, int op_ret, int invoke_cbk)
{
dht_local_t *local = NULL, *lock_local = NULL;
call_frame_t *lock_frame = NULL;
@@ -5239,7 +5242,7 @@ dht_mknod_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_mknod_finish (frame, this, -1);
+ dht_mknod_finish (frame, this, -1, 0);
return 0;
}
@@ -5270,7 +5273,7 @@ dht_mknod_lock (call_frame_t *frame, xlator_t *subvol)
local->lock.lk_count = count;
ret = dht_blocking_inodelk (frame, lk_array, count,
- dht_mknod_lock_cbk);
+ IGNORE_ENOENT_ESTALE, dht_mknod_lock_cbk);
if (ret < 0) {
local->lock.locks = NULL;
@@ -5797,7 +5800,7 @@ out:
if (local && local->lock.locks) {
/* store op_errno for failure case*/
local->op_errno = op_errno;
- local->refresh_layout_unlock (frame, this, op_ret);
+ local->refresh_layout_unlock (frame, this, op_ret, 0);
if (op_ret == 0) {
DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
@@ -5838,7 +5841,7 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
return 0;
err:
if (local->lock.locks)
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
return 0;
}
@@ -6002,7 +6005,7 @@ dht_create_do (call_frame_t *frame)
local->umask, local->fd, local->params);
return 0;
err:
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
return 0;
}
@@ -6016,7 +6019,7 @@ dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int32_t
-dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret)
+dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret, int invoke_cbk)
{
dht_local_t *local = NULL, *lock_local = NULL;
call_frame_t *lock_frame = NULL;
@@ -6091,7 +6094,7 @@ dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_create_finish (frame, this, -1);
+ dht_create_finish (frame, this, -1, 0);
return 0;
}
@@ -6122,7 +6125,7 @@ dht_create_lock (call_frame_t *frame, xlator_t *subvol)
local->lock.lk_count = count;
ret = dht_blocking_inodelk (frame, lk_array, count,
- dht_create_lock_cbk);
+ IGNORE_ENOENT_ESTALE, dht_create_lock_cbk);
if (ret < 0) {
local->lock.locks = NULL;
@@ -6582,6 +6585,7 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
if (local->need_selfheal) {
+ dht_rmdir_unlock (frame, this);
local->layout =
dht_layout_get (this, local->loc.inode);
@@ -6605,6 +6609,7 @@ unlock:
1);
}
+ dht_rmdir_unlock (frame, this);
DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
local->op_errno, &local->preparent,
&local->postparent, NULL);
@@ -6673,6 +6678,7 @@ unlock:
if (done) {
if (local->need_selfheal && local->fop_succeeded) {
+ dht_rmdir_unlock (frame, this);
local->layout =
dht_layout_get (this, local->loc.inode);
@@ -6707,6 +6713,7 @@ unlock:
}
+ dht_rmdir_unlock (frame, this);
DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
local->op_errno, &local->preparent,
&local->postparent, NULL);
@@ -6718,11 +6725,110 @@ unlock:
int
+dht_rmdir_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+
+int
+dht_rmdir_unlock (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL)
+ goto done;
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed rmdir for %s)",
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = (op_errno == EAGAIN) ? EBUSY : op_errno;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->hashed_subvol &&
+ (local->hashed_subvol == conf->subvolumes[i]))
+ continue;
+
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
+
+ return 0;
+
+err:
+ /* No harm in calling an extra rmdir unlock */
+ dht_rmdir_unlock (frame, this);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+
+int
dht_rmdir_do (call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
- int i = 0;
+ dht_lock_t **lk_array = NULL;
+ int i = 0, ret = -1;
+ int count = 1;
xlator_t *hashed_subvol = NULL;
char gfid[GF_UUID_BUF_SIZE] ={0};
@@ -6736,7 +6842,6 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
local->call_cnt = conf->subvolume_cnt;
-
/* first remove from non-hashed_subvol */
hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
@@ -6760,20 +6865,49 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
return 0;
}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (hashed_subvol &&
- (hashed_subvol == conf->subvolumes[i]))
- continue;
+ count = conf->subvolume_cnt;
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags, NULL);
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto err;
+ }
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_rmdir_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ local->op_ret = -1;
+ local->op_errno = errno ? errno : EINVAL;
+ goto err;
}
return 0;
err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
&local->preparent, &local->postparent, NULL);
return 0;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 1b5a084..7f99a06 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -45,7 +45,7 @@ typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, xlator_t *dst_node,
call_frame_t *frame);
typedef int (*dht_refresh_layout_unlock) (call_frame_t *frame, xlator_t *this,
- int op_ret);
+ int op_ret, int invoke_cbk);
typedef int (*dht_refresh_layout_done_handle) (call_frame_t *frame);
@@ -136,6 +136,11 @@ typedef enum {
qdstatfs_action_COMPARE,
} qdstatfs_action_t;
+typedef enum {
+ FAIL_ON_ANY_ERROR,
+ IGNORE_ENOENT_ESTALE
+} dht_reaction_type_t;
+
struct dht_skip_linkto_unlink {
gf_boolean_t handle_valid_link;
@@ -266,6 +271,7 @@ struct dht_local {
fop_inodelk_cbk_t inodelk_cbk;
dht_lock_t **locks;
int lk_count;
+ dht_reaction_type_t reaction;
/* whether locking failed on _any_ of the "locks" above */
int op_ret;
@@ -1047,7 +1053,8 @@ dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict);
int
dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk);
/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
* @lk_array directly. locks are issued on some order which remains same
@@ -1055,7 +1062,8 @@ dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
*/
int
dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk);
int32_t
dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 2e4a53c..1b3fbb0 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -347,6 +347,7 @@ dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
lock->xl = xl;
lock->type = type;
+
lock->domain = gf_strdup (domain);
if (lock->domain == NULL) {
dht_lock_free (lock);
@@ -1692,7 +1693,8 @@ out:
int
dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk)
{
struct gf_flock flock = {0,};
int i = 0, ret = 0;
@@ -1715,6 +1717,7 @@ dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
local = lock_frame->local;
+ local->lock.reaction = reaction;
local->main_frame = frame;
local->call_cnt = lk_count;
@@ -1745,21 +1748,42 @@ dht_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int lk_index = 0;
+ int i = 0;
dht_local_t *local = NULL;
lk_index = (long) cookie;
local = frame->local;
-
if (op_ret == 0) {
local->lock.locks[lk_index]->locked = _gf_true;
} else {
- local->lock.op_ret = -1;
- local->lock.op_errno = op_errno;
- goto cleanup;
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ if (local->lock.reaction != IGNORE_ENOENT_ESTALE) {
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ goto cleanup;
+ }
+ break;
+ default:
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ goto cleanup;
+ }
}
if (lk_index == (local->lock.lk_count - 1)) {
+ for (i = 0; (i < local->lock.lk_count) &&
+ (!local->lock.locks[i]->locked); i++) {
+ ;
+ }
+
+ if (i == local->lock.lk_count) {
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ }
+
dht_inodelk_done (frame);
} else {
dht_blocking_inodelk_rec (frame, ++lk_index);
@@ -1833,7 +1857,8 @@ out:
int
dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk)
{
int ret = -1;
call_frame_t *lock_frame = NULL;
@@ -1855,6 +1880,7 @@ dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
local = lock_frame->local;
+ local->lock.reaction = reaction;
local->main_frame = frame;
dht_blocking_inodelk_rec (lock_frame, 0);
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 320f875..06d7ac8 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1307,7 +1307,7 @@ dht_rename_lock (call_frame_t *frame)
local->lock.lk_count = count;
ret = dht_nonblocking_inodelk (frame, lk_array, count,
- dht_rename_lock_cbk);
+ FAIL_ON_ANY_ERROR, dht_rename_lock_cbk);
if (ret < 0) {
local->lock.locks = NULL;
local->lock.lk_count = 0;
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index cd1d97f..46491cf 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -82,7 +82,7 @@ dht_selfheal_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret, int invoke_cbk)
{
dht_local_t *local = NULL, *lock_local = NULL;
call_frame_t *lock_frame = NULL;
@@ -90,7 +90,6 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
local = frame->local;
lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
-
if (lock_count == 0)
goto done;
@@ -117,8 +116,9 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
lock_frame = NULL;
done:
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno, NULL);
+ if (!invoke_cbk)
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno, NULL);
if (lock_frame != NULL) {
DHT_STACK_DESTROY (lock_frame);
}
@@ -160,13 +160,13 @@ dht_refresh_layout_done (call_frame_t *frame)
dht_layout_unref (frame->this, heal);
- dht_selfheal_dir_finish (frame, frame->this, 0);
+ dht_selfheal_dir_finish (frame, frame->this, 0, 0);
}
return 0;
err:
- dht_selfheal_dir_finish (frame, frame->this, -1);
+ dht_selfheal_dir_finish (frame, frame->this, -1, 0);
return 0;
}
@@ -224,8 +224,9 @@ unlock:
return 0;
err:
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
+ dht_selfheal_dir_finish (frame, this, -1, 0);
return 0;
}
@@ -291,7 +292,8 @@ dht_refresh_layout (call_frame_t *frame)
return 0;
out:
- local->refresh_layout_unlock (frame, this, -1);
+ local->refresh_layout_unlock (frame, this, -1, 0);
+ dht_selfheal_dir_finish (frame, this, -1, 0);
return 0;
}
@@ -319,7 +321,7 @@ dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_selfheal_dir_finish (frame, this, -1);
+ dht_selfheal_dir_finish (frame, this, -1, 0);
return 0;
}
@@ -580,7 +582,7 @@ dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
local->lock.locks = lk_array;
local->lock.lk_count = count;
- ret = dht_blocking_inodelk (frame, lk_array, count,
+ ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
dht_selfheal_layout_lock_cbk);
if (ret < 0) {
local->lock.locks = NULL;
@@ -591,13 +593,7 @@ dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
return 0;
err:
if (lk_array != NULL) {
- int tmp_count = 0, i = 0;
-
- for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) {
- ;
- }
-
- dht_lock_array_free (lk_array, tmp_count);
+ dht_lock_array_free (lk_array, count);
GF_FREE (lk_array);
}
@@ -636,7 +632,7 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
+ dht_selfheal_dir_finish (frame, this, 0, 0);
}
return 0;
@@ -831,7 +827,7 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
missing_xattr, loc->path);
if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
+ dht_selfheal_dir_finish (frame, this, 0, 0);
return 0;
}
@@ -958,7 +954,7 @@ dht_selfheal_dir_xattr_for_nameless_lookup (call_frame_t *frame, loc_t *loc,
missing_xattr, loc->path);
if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
+ dht_selfheal_dir_finish (frame, this, 0, 0);
return 0;
}
@@ -1026,7 +1022,7 @@ dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_should_heal_layout);
if (ret < 0) {
- dht_selfheal_dir_finish (frame, this, -1);
+ dht_selfheal_dir_finish (frame, this, -1, 0);
}
}
@@ -1057,7 +1053,7 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
dht_should_heal_layout);
if (ret < 0) {
- dht_selfheal_dir_finish (frame, this, -1);
+ dht_selfheal_dir_finish (frame, this, -1, 0);
}
return 0;
@@ -1095,7 +1091,7 @@ dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_layout_t *layout = NULL;
call_frame_t *prev = NULL;
xlator_t *subvol = NULL;
- int i = 0;
+ int i = 0, ret = -1;
int this_call_cnt = 0;
char gfid[GF_UUID_BUF_SIZE] = {0};
@@ -1114,7 +1110,6 @@ dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret) {
-
gf_uuid_unparse(local->loc.gfid, gfid);
gf_msg (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
GF_LOG_WARNING),
@@ -1127,11 +1122,13 @@ dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ ret = 0;
out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, ret, -1);
dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
}
@@ -1184,32 +1181,33 @@ out:
}
int
-dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+dht_selfheal_dir_mkdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int missing_dirs = 0;
+ dht_local_t *local = NULL;
int i = 0;
int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
- local = frame->local;
- this = frame->this;
+ VALIDATE_OR_GOTO (this->private, err);
- local->selfheal.force_mkdir = force ? _gf_true : _gf_false;
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
- }
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed for %s",
+ loc->path);
- if (missing_dirs == 0) {
- dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
- return 0;
+ local->op_ret = -1;
+ local->op_errno = (op_errno == EAGAIN) ? EBUSY : op_errno;
+ goto err;
}
- local->call_cnt = missing_dirs;
if (!gf_uuid_is_null (local->gfid)) {
dict = dict_new ();
if (!dict)
@@ -1223,6 +1221,7 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
" key = gfid-req", loc->path);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
+
dict = dict_ref (local->params);
}
/* Set acls */
@@ -1234,8 +1233,18 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
DHT_MSG_DICT_SET_FAILED,
"dict is NULL, need to make sure gfids are same");
+
+ /* We don't have to do a lookup here again:
+ 1) Parallel rmdir would had removed the directory and locking would
+ have anyway failed with an ESTALE on all subvols. Hence selfheal
+ will never create the directory.
+ 2) Parallel lookup creating directory does not have to be mutually
+ exclusive for the mkdir phase of lookup selfheal.
+ */
+
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
+ if (layout->list[i].err == ENOENT ||
+ local->selfheal.force_mkdir) {
gf_msg_debug (this->name, 0,
"Creating directory %s on subvol %s",
loc->path, layout->list[i].xlator->name);
@@ -1254,6 +1263,82 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
dict_unref (dict);
return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1, 0);
+ return 0;
+}
+
+int
+dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ int ret = -1;
+ int count = 1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ local->selfheal.force_mkdir = force ? _gf_true : _gf_false;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_setattr (frame, loc, &local->stbuf,
+ 0xffffffff, layout);
+ return 0;
+ }
+
+ local->call_cnt = missing_dirs;
+ count = conf->subvolume_cnt;
+
+ /* Locking on all subvols in the mkdir phase of lookup selfheal is
+ is done to synchronize with rmdir/rename.
+ */
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_selfheal_dir_mkdir_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
}
int
@@ -1825,7 +1910,7 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
sorry_no_fix:
/* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ dht_selfheal_dir_finish (frame, this, ret, 0);
return 0;
}
@@ -1893,7 +1978,7 @@ dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
sorry_no_fix:
/* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ dht_selfheal_dir_finish (frame, this, ret, 0);
return 0;
@@ -2244,7 +2329,7 @@ dht_update_commit_hash_for_layout (call_frame_t *frame)
local->lock.locks = lk_array;
local->lock.lk_count = count;
- ret = dht_blocking_inodelk (frame, lk_array, count,
+ ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
dht_update_commit_hash_for_layout_resume);
if (ret < 0) {
local->lock.locks = NULL;
@@ -2255,13 +2340,7 @@ dht_update_commit_hash_for_layout (call_frame_t *frame)
return 0;
err:
if (lk_array != NULL) {
- int tmp_count = 0, i = 0;
-
- for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) {
- ;
- }
-
- dht_lock_array_free (lk_array, tmp_count);
+ dht_lock_array_free (lk_array, count);
GF_FREE (lk_array);
}
--
1.7.1