From fd75683b1e1334d60fbbca97f7d35dd50d61b404 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 7 Nov 2016 14:47:34 +0530 Subject: [PATCH 195/206] cluster/afr: Fix bugs in [f]inodelk/[f]entrylk Problems: 1) Inodelk is not taking quorum into account 2) finodelk, [f]entrylk are not implemented correctly 3) By default afr doesn't go for non-blocking parallel locks. Fix: Implemented a common framework which can be used by [f]inodelk/[f]entrylk. Used quorum for the same. >Change-Id: I239f13875a065298630d266941df10cfa3addc85 >BUG: 1369077 >Signed-off-by: Pranith Kumar K >Reviewed-on: http://review.gluster.org/15802 >Tested-by: Krutika Dhananjay >Reviewed-by: Krutika Dhananjay >Smoke: Gluster Build System >Reviewed-by: Ravishankar N >CentOS-regression: Gluster Build System >NetBSD-regression: NetBSD Build System BUG: 1393694 Change-Id: If36907dad803b4774372036a54f8a034cd4155f5 Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/91346 --- tests/basic/afr/inodelk.t | 87 ++++ xlators/cluster/afr/src/afr-common.c | 676 ++++++++++++++++-------------- xlators/cluster/afr/src/afr-lk-common.c | 2 +- xlators/cluster/afr/src/afr-messages.h | 4 +- xlators/cluster/afr/src/afr-transaction.c | 8 - xlators/cluster/afr/src/afr.h | 21 + 6 files changed, 483 insertions(+), 315 deletions(-) create mode 100644 tests/basic/afr/inodelk.t diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t new file mode 100644 index 0000000..a32aa85 --- /dev/null +++ b/tests/basic/afr/inodelk.t @@ -0,0 +1,87 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +#This test tests that inodelk fails when quorum is not met. Also tests the +#success case where inodelk is obtained and unlocks are done correctly. + +TEST glusterd; +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 $M0 + +#Test success case +TEST mkdir $M0/dir1 +TEST mv $M0/dir1 $M0/dir2 + +#If there is a problem with inodelk unlocking the following would hang. +TEST mv $M0/dir2 $M0/dir1 + +#Test failure case by bringing two of the bricks down +#Test that the directory is not moved partially on some bricks but successful +#on other subvol where quorum meets. Do that for both set of bricks + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! mv $M0/dir1 $M0/dir2 + +TEST stat $B0/${V0}0/dir1 +TEST stat $B0/${V0}1/dir1 +TEST stat $B0/${V0}2/dir1 +TEST stat $B0/${V0}3/dir1 +TEST stat $B0/${V0}4/dir1 +TEST stat $B0/${V0}5/dir1 +TEST ! stat $B0/${V0}0/dir2 +TEST ! stat $B0/${V0}1/dir2 +TEST ! stat $B0/${V0}2/dir2 +TEST ! stat $B0/${V0}3/dir2 +TEST ! stat $B0/${V0}4/dir2 +TEST ! stat $B0/${V0}5/dir2 + +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST ! mv $M0/dir1 $M0/dir2 +TEST stat $B0/${V0}0/dir1 +TEST stat $B0/${V0}1/dir1 +TEST stat $B0/${V0}2/dir1 +TEST stat $B0/${V0}3/dir1 +TEST stat $B0/${V0}4/dir1 +TEST stat $B0/${V0}5/dir1 +TEST ! stat $B0/${V0}0/dir2 +TEST ! stat $B0/${V0}1/dir2 +TEST ! stat $B0/${V0}2/dir2 +TEST ! stat $B0/${V0}3/dir2 +TEST ! stat $B0/${V0}4/dir2 +TEST ! stat $B0/${V0}5/dir2 + +#Bring the bricks back up and try mv once more, it should succeed. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 +TEST mv $M0/dir1 $M0/dir2 +cleanup; +#Do similar tests on replica 2 +TEST glusterd; +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3} +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 $M0 +TEST mkdir $M0/dir1 +TEST mv $M0/dir1 $M0/dir2 +#Because we don't know hashed subvol, do the same test twice bringing 1 brick +#from each down, quorum calculation should allow it. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/dir2 $M0/dir1 +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/dir1 $M0/dir2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/dir2 $M0/dir1 +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index dec6026..ab60406 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,14 @@ #include "afr-messages.h" #include "compound-fop-utils.h" +int32_t +afr_quorum_errno (afr_private_t *priv) +{ + if (priv->quorum_reads) + return ENOTCONN; + return EROFS; +} + call_frame_t * afr_copy_frame (call_frame_t *base) { @@ -1558,6 +1566,29 @@ afr_remove_eager_lock_stub (afr_local_t *local) UNLOCK (&local->fd->lock); } +static gf_boolean_t +afr_fop_lock_is_unlock (call_frame_t *frame) +{ + afr_local_t *local = frame->local; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) && + (local->cont.inodelk.in_cmd == F_SETLKW || + local->cont.inodelk.in_cmd == F_SETLK)) + return _gf_true; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd) + return _gf_true; + break; + default: + break; + } + return _gf_false; +} + void afr_local_cleanup (afr_local_t *local, xlator_t *this) { @@ -1681,6 +1712,15 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* inodelk */ GF_FREE (local->cont.inodelk.volume); + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + } + + { /* entrylk */ + GF_FREE (local->cont.entrylk.volume); + GF_FREE (local->cont.entrylk.basename); + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); } if (local->xdata_req) @@ -3242,10 +3282,96 @@ out: /* }}} */ -int32_t -afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +static int +afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this); + +static gf_boolean_t +afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno) +{ + if (op_ret == -1 && op_errno == EAGAIN) + return _gf_true; + return _gf_false; +} + +static void +afr_fop_lock_unwind (call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + switch (op) { + case GF_FOP_INODELK: + AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_FINODELK: + AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_ENTRYLK: + AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_FENTRYLK: + AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata); + break; + default: + break; + } +} + +static void +afr_fop_lock_wind (call_frame_t *frame, xlator_t *this, int child_index, + int32_t (*lock_cbk) (call_frame_t *, void *, xlator_t *, + int32_t, int32_t, dict_t *)) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + int i = child_index; + + switch (local->op) { + case GF_FOP_INODELK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + (const char *)local->cont.inodelk.volume, + &local->loc, local->cont.inodelk.cmd, + &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; + case GF_FOP_FINODELK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->finodelk, + (const char *)local->cont.inodelk.volume, + local->fd, local->cont.inodelk.cmd, + &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; + case GF_FOP_ENTRYLK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + local->cont.entrylk.volume, &local->loc, + local->cont.entrylk.basename, + local->cont.entrylk.cmd, + local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; + case GF_FOP_FENTRYLK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fentrylk, + local->cont.entrylk.volume, local->fd, + local->cont.entrylk.basename, + local->cont.entrylk.cmd, + local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; + default: + break; + } +} + +static int32_t +afr_unlock_partial_lock_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3258,28 +3384,78 @@ afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie, priv = this->private; if (op_ret < 0 && op_errno != ENOTCONN) { - loc_gfid (&local->loc, gfid); - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INODE_UNLOCK_FAIL, - "%s: Failed to unlock %s " - "with lk_owner: %s (%s)", uuid_utoa (gfid), + if (local->fd) + gf_uuid_copy (gfid, local->fd->inode->gfid); + else + loc_gfid (&local->loc, gfid); + gf_msg (this->name, GF_LOG_ERROR, op_errno, + AFR_MSG_UNLOCK_FAIL, + "%s: Failed to unlock %s on %s " + "with lk_owner: %s", uuid_utoa (gfid), + gf_fop_list[local->op], priv->children[child_index]->name, - lkowner_utoa (&frame->root->lk_owner), - strerror (op_errno)); + lkowner_utoa (&frame->root->lk_owner)); } call_count = afr_frame_return (frame); - if (call_count == 0) { - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno, local->xdata_rsp); - } + if (call_count) + goto out; + if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) { + afr_fop_lock_unwind (frame, local->op, local->op_ret, + local->op_errno, local->xdata_rsp); + goto out; + } + /* At least one child is up */ + /* + * Non-blocking locks also need to be serialized. Otherwise there is + * a chance that both the mounts which issued same non-blocking inodelk + * may endup not acquiring the lock on any-brick. + * Ex: Mount1 and Mount2 + * request for full length lock on file f1. Mount1 afr may acquire the + * partial lock on brick-1 and may not acquire the lock on brick-2 + * because Mount2 already got the lock on brick-2, vice versa. Since + * both the mounts only got partial locks, afr treats them as failure in + * gaining the locks and unwinds with EAGAIN errno. + */ + local->op_ret = -1; + local->op_ret = EUCLEAN; + local->fop_lock_state = AFR_FOP_LOCK_SERIAL; + afr_local_replies_wipe (local, priv); + if (local->xdata_rsp) + dict_unref (local->xdata_rsp); + local->xdata_rsp = NULL; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.cmd = local->cont.inodelk.in_cmd; + local->cont.inodelk.flock = local->cont.inodelk.in_flock; + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + if (local->xdata_req) + local->cont.inodelk.xdata = dict_ref (local->xdata_req); + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = local->cont.entrylk.in_cmd; + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + if (local->xdata_req) + local->cont.entrylk.xdata = dict_ref (local->xdata_req); + break; + default: + break; + } + afr_serialized_lock_wind (frame, this); +out: return 0; } -int32_t -afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, - int call_count) +static int32_t +afr_unlock_locks_and_proceed (call_frame_t *frame, xlator_t *this, + int call_count) { int i = 0; afr_private_t *priv = NULL; @@ -3288,7 +3464,25 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, local = frame->local; priv = this->private; local->call_count = call_count; - local->cont.inodelk.flock.l_type = F_UNLCK; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.flock.l_type = F_UNLCK; + local->cont.inodelk.cmd = F_SETLK; + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = ENTRYLK_UNLOCK; + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + break; + default: + break; + } for (i = 0; i < priv->child_count; i++) { if (!local->replies[i].valid) @@ -3297,13 +3491,7 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, if (local->replies[i].op_ret == -1) continue; - STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk, - (void*) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, 0); + afr_fop_lock_wind (frame, this, i, afr_unlock_partial_lock_cbk); if (!--call_count) break; @@ -3313,23 +3501,27 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, } int32_t -afr_inodelk_done (call_frame_t *frame, xlator_t *this) +afr_fop_lock_done (call_frame_t *frame, xlator_t *this) { int i = 0; int lock_count = 0; + unsigned char *success = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; local = frame->local; priv = this->private; + success = alloca0(priv->child_count); for (i = 0; i < priv->child_count; i++) { if (!local->replies[i].valid) continue; - if (local->replies[i].op_ret == 0) + if (local->replies[i].op_ret == 0) { lock_count++; + success[i] = 1; + } if (local->op_ret == -1 && local->op_errno == EAGAIN) continue; @@ -3347,20 +3539,29 @@ afr_inodelk_done (call_frame_t *frame, xlator_t *this) local->op_errno = local->replies[i].op_errno; } - if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK && - (local->op_ret == -1 && local->op_errno == EAGAIN)) { - afr_unlock_inodelks_and_unwind (frame, this, - lock_count); + if (afr_fop_lock_is_unlock (frame) || (lock_count == 0)) + goto unwind; + + if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) { + afr_unlock_locks_and_proceed (frame, this, lock_count); + } else if (priv->quorum_count && !afr_has_quorum (success, this)) { + local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED; + local->op_ret = -1; + local->op_errno = afr_quorum_errno (priv); + afr_unlock_locks_and_proceed (frame, this, lock_count); } else { - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno, local->xdata_rsp); + goto unwind; } return 0; +unwind: + afr_fop_lock_unwind (frame, local->op, local->op_ret, + local->op_errno, local->xdata_rsp); + return 0; } -int -afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +static int +afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3384,32 +3585,8 @@ afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } static int32_t -afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - -{ - int call_count = 0; - - afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata); - - call_count = afr_frame_return (frame); - if (call_count == 0) - afr_inodelk_done (frame, this); - - return 0; -} - -static gf_boolean_t -afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno) -{ - if (op_ret == -1 && op_errno == EAGAIN) - return _gf_true; - return _gf_false; -} - -static int32_t -afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_serialized_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3420,7 +3597,7 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; priv = this->private; - afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata); + afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); for (next_child = child_index + 1; next_child < priv->child_count; next_child++) { @@ -3430,80 +3607,117 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (afr_is_conflicting_lock_present (op_ret, op_errno) || (next_child == priv->child_count)) { - afr_inodelk_done (frame, this); + afr_fop_lock_done (frame, this); } else { - STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk, - (void *) (long) next_child, - priv->children[next_child], - priv->children[next_child]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); + afr_fop_lock_wind (frame, this, next_child, + afr_serialized_lock_cbk); } return 0; } static int -afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this) +afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this) { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int call_count = 0; int i = 0; priv = this->private; local = frame->local; - call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) - continue; - STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); - if (!--call_count) + if (local->child_up[i]) { + afr_fop_lock_wind (frame, this, i, + afr_serialized_lock_cbk); break; + } } return 0; } +static int32_t +afr_parallel_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + +{ + int call_count = 0; + + afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); + + call_count = afr_frame_return (frame); + if (call_count == 0) + afr_fop_lock_done (frame, this); + + return 0; +} + static int -afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this) +afr_parallel_lock_wind (call_frame_t *frame, xlator_t *this) { afr_private_t *priv = NULL; afr_local_t *local = NULL; + int call_count = 0; int i = 0; priv = this->private; local = frame->local; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); + if (!local->child_up[i]) + continue; + afr_fop_lock_wind (frame, this, i, afr_parallel_lock_cbk); + if (!--call_count) + break; + } + return 0; +} + +static int +afr_fop_handle_lock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = frame->local; + + if (!afr_fop_lock_is_unlock (frame)) { + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.cmd = F_SETLK; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = ENTRYLK_LOCK_NB; + break; + default: break; } } + + if (local->xdata_req) { + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.xdata = dict_ref (local->xdata_req); + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.xdata = dict_ref (local->xdata_req); + break; + default: + break; + } + } + + local->fop_lock_state = AFR_FOP_LOCK_PARALLEL; + afr_parallel_lock_wind (frame, this); return 0; } -int32_t -afr_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +static int32_t +afr_handle_inodelk (call_frame_t *frame, glusterfs_fop_t fop, + const char *volume, loc_t *loc, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { afr_local_t *local = NULL; int32_t op_errno = ENOMEM; @@ -3512,259 +3726,113 @@ afr_inodelk (call_frame_t *frame, xlator_t *this, if (!local) goto out; - loc_copy (&local->loc, loc); + local->op = fop; + if (loc) + loc_copy (&local->loc, loc); + if (fd) + local->fd = fd_ref (fd); + local->cont.inodelk.volume = gf_strdup (volume); if (!local->cont.inodelk.volume) { op_errno = ENOMEM; goto out; } + local->cont.inodelk.in_cmd = cmd; local->cont.inodelk.cmd = cmd; + local->cont.inodelk.in_flock = *flock; local->cont.inodelk.flock = *flock; if (xdata) local->xdata_req = dict_ref (xdata); - /* At least one child is up */ - /* - * Non-blocking locks also need to be serialized. Otherwise there is - * a chance that both the mounts which issued same non-blocking inodelk - * may endup not acquiring the lock on any-brick. - * Ex: Mount1 and Mount2 - * request for full length lock on file f1. Mount1 afr may acquire the - * partial lock on brick-1 and may not acquire the lock on brick-2 - * because Mount2 already got the lock on brick-2, vice versa. Since - * both the mounts only got partial locks, afr treats them as failure in - * gaining the locks and unwinds with EAGAIN errno. - */ - if (flock->l_type == F_UNLCK) { - afr_parallel_inodelk_wind (frame, this); - } else { - afr_serialized_inodelk_wind (frame, this); - } - - return 0; + op_errno = -afr_fop_handle_lock (frame, frame->this); + if (op_errno) + goto out; + return 0; out: - AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL); + afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); return 0; } - int32_t -afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - +afr_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (finodelk, frame, local->op_ret, - local->op_errno, xdata); - + afr_handle_inodelk (frame, GF_FOP_INODELK, volume, loc, NULL, cmd, + flock, xdata); return 0; } - int32_t afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - int32_t cmd, struct gf_flock *flock, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_finodelk_cbk, - priv->children[i], - priv->children[i]->fops->finodelk, - volume, fd, cmd, flock, xdata); - - if (!--call_count) - break; - } - } - - return 0; -out: - AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); - - return 0; -} - - -int32_t -afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (entrylk, frame, local->op_ret, - local->op_errno, xdata); - + afr_handle_inodelk (frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, + flock, xdata); return 0; } - -int -afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata) +static int +afr_handle_entrylk (call_frame_t *frame, glusterfs_fop_t fop, + const char *volume, loc_t *loc, fd_t *fd, + const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { - afr_private_t *priv = NULL; afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = 0; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + int32_t op_errno = ENOMEM; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_entrylk_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - volume, loc, basename, cmd, type, xdata); + local = AFR_FRAME_INIT (frame, op_errno); + if (!local) + goto out; - if (!--call_count) - break; - } + local->op = fop; + if (loc) + loc_copy (&local->loc, loc); + if (fd) + local->fd = fd_ref (fd); + local->cont.entrylk.cmd = cmd; + local->cont.entrylk.in_cmd = cmd; + local->cont.entrylk.type = type; + local->cont.entrylk.volume = gf_strdup (volume); + local->cont.entrylk.basename = gf_strdup (basename); + if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) { + op_errno = ENOMEM; + goto out; } + if (xdata) + local->xdata_req = dict_ref (xdata); + op_errno = -afr_fop_handle_lock (frame, frame->this); + if (op_errno) + goto out; - return 0; + return 0; out: - AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL); - + afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); return 0; } - - int -afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - +afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (fentrylk, frame, local->op_ret, - local->op_errno, xdata); - + afr_handle_entrylk (frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, + cmd, type, xdata); return 0; } - int afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, - dict_t *xdata) + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fentrylk_cbk, - priv->children[i], - priv->children[i]->fops->fentrylk, - volume, fd, basename, cmd, type, xdata); - - if (!--call_count) - break; - } - } - - return 0; -out: - AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL); - + afr_handle_entrylk (frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, + cmd, type, xdata); return 0; } - int afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata) diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 0bd9ffe..6019454 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -669,7 +669,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_INODE_UNLOCK_FAIL, + AFR_MSG_UNLOCK_FAIL, "path=%s gfid=%s: unlock failed on subvolume %s " "with lock owner %s", local->loc.path, loc_gfid_utoa (&(local->loc)), diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index c7af18d..00e689b 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -130,11 +130,11 @@ /*! * @messageid 108010 - * @diagnosis Inode unlocks failed on a brick. + * @diagnosis unlocks failed on a brick. * @recommendedaction Error number in the log should give the reason why it * failed. Also observe brick logs for more information. */ -#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10) +#define AFR_MSG_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10) /*! * @messageid 108011 diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index bff8226..1ed327c 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -136,14 +136,6 @@ afr_needs_changelog_update (afr_local_t *local) return _gf_false; } -static int32_t -afr_quorum_errno (afr_private_t *priv) -{ - if (priv->quorum_reads) - return ENOTCONN; - return EROFS; -} - int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 17b997e..a9cab13 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -348,6 +348,11 @@ typedef struct { int readdir_subvol; } afr_fd_ctx_t; +typedef enum { + AFR_FOP_LOCK_PARALLEL, + AFR_FOP_LOCK_SERIAL, + AFR_FOP_LOCK_QUORUM_FAILED, +} afr_fop_lock_state_t; typedef struct _afr_local { glusterfs_fop_t op; @@ -665,10 +670,22 @@ typedef struct _afr_local { struct { char *volume; int32_t cmd; + int32_t in_cmd; + struct gf_flock in_flock; struct gf_flock flock; + void *xdata; } inodelk; struct { + char *volume; + char *basename; + entrylk_cmd in_cmd; + entrylk_cmd cmd; + entrylk_type type; + void *xdata; + } entrylk; + + struct { off_t offset; gf_seek_what_t what; } seek; @@ -788,6 +805,7 @@ typedef struct _afr_local { gf_boolean_t need_full_crawl; gf_boolean_t compound; + afr_fop_lock_state_t fop_lock_state; } afr_local_t; @@ -1200,4 +1218,7 @@ void afr_compound_cleanup (compound_args_t *args, dict_t *xdata, dict_t *newloc_xdata); +int32_t +afr_quorum_errno (afr_private_t *priv); + #endif /* __AFR_H__ */ -- 2.9.3