From fd75683b1e1334d60fbbca97f7d35dd50d61b404 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 7 Nov 2016 14:47:34 +0530
Subject: [PATCH 195/206] cluster/afr: Fix bugs in [f]inodelk/[f]entrylk
Problems:
1) Inodelk is not taking quorum into account
2) finodelk, [f]entrylk are not implemented correctly
3) By default afr doesn't go for non-blocking parallel locks.
Fix:
Implemented a common framework which can be used by
[f]inodelk/[f]entrylk. Used quorum for the same.
>Change-Id: I239f13875a065298630d266941df10cfa3addc85
>BUG: 1369077
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
>Reviewed-on: http://review.gluster.org/15802
>Tested-by: Krutika Dhananjay <kdhananj@redhat.com>
>Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
>Smoke: Gluster Build System <jenkins@build.gluster.org>
>Reviewed-by: Ravishankar N <ravishankar@redhat.com>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
BUG: 1393694
Change-Id: If36907dad803b4774372036a54f8a034cd4155f5
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/91346
---
tests/basic/afr/inodelk.t | 87 ++++
xlators/cluster/afr/src/afr-common.c | 676 ++++++++++++++++--------------
xlators/cluster/afr/src/afr-lk-common.c | 2 +-
xlators/cluster/afr/src/afr-messages.h | 4 +-
xlators/cluster/afr/src/afr-transaction.c | 8 -
xlators/cluster/afr/src/afr.h | 21 +
6 files changed, 483 insertions(+), 315 deletions(-)
create mode 100644 tests/basic/afr/inodelk.t
diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t
new file mode 100644
index 0000000..a32aa85
--- /dev/null
+++ b/tests/basic/afr/inodelk.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#This test tests that inodelk fails when quorum is not met. Also tests the
+#success case where inodelk is obtained and unlocks are done correctly.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+
+#Test success case
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+
+#If there is a problem with inodelk unlocking the following would hang.
+TEST mv $M0/dir2 $M0/dir1
+
+#Test failure case by bringing two of the bricks down
+#Test that the directory is not moved partially on some bricks but successful
+#on other subvol where quorum meets. Do that for both set of bricks
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! mv $M0/dir1 $M0/dir2
+
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST ! mv $M0/dir1 $M0/dir2
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+#Bring the bricks back up and try mv once more, it should succeed.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+TEST mv $M0/dir1 $M0/dir2
+cleanup;
+#Do similar tests on replica 2
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+#Because we don't know hashed subvol, do the same test twice bringing 1 brick
+#from each down, quorum calculation should allow it.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/dir1 $M0/dir2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index dec6026..ab60406 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,6 +45,14 @@
#include "afr-messages.h"
#include "compound-fop-utils.h"
+int32_t
+afr_quorum_errno (afr_private_t *priv)
+{
+ if (priv->quorum_reads)
+ return ENOTCONN;
+ return EROFS;
+}
+
call_frame_t *
afr_copy_frame (call_frame_t *base)
{
@@ -1558,6 +1566,29 @@ afr_remove_eager_lock_stub (afr_local_t *local)
UNLOCK (&local->fd->lock);
}
+static gf_boolean_t
+afr_fop_lock_is_unlock (call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+ (local->cont.inodelk.in_cmd == F_SETLKW ||
+ local->cont.inodelk.in_cmd == F_SETLK))
+ return _gf_true;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+ return _gf_true;
+ break;
+ default:
+ break;
+ }
+ return _gf_false;
+}
+
void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
{
@@ -1681,6 +1712,15 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
{ /* inodelk */
GF_FREE (local->cont.inodelk.volume);
+ if (local->cont.inodelk.xdata)
+ dict_unref (local->cont.inodelk.xdata);
+ }
+
+ { /* entrylk */
+ GF_FREE (local->cont.entrylk.volume);
+ GF_FREE (local->cont.entrylk.basename);
+ if (local->cont.entrylk.xdata)
+ dict_unref (local->cont.entrylk.xdata);
}
if (local->xdata_req)
@@ -3242,10 +3282,96 @@ out:
/* }}} */
-int32_t
-afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+static int
+afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this);
+
+static gf_boolean_t
+afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
+{
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
+}
+
+static void
+afr_fop_lock_unwind (call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ switch (op) {
+ case GF_FOP_INODELK:
+ AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FINODELK:
+ AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+afr_fop_lock_wind (call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t (*lock_cbk) (call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *))
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = child_index;
+
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ (const char *)local->cont.inodelk.volume,
+ local->fd, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ local->cont.entrylk.volume, &local->loc,
+ local->cont.entrylk.basename,
+ local->cont.entrylk.cmd,
+ local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ local->cont.entrylk.volume, local->fd,
+ local->cont.entrylk.basename,
+ local->cont.entrylk.cmd,
+ local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ default:
+ break;
+ }
+}
+
+static int32_t
+afr_unlock_partial_lock_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3258,28 +3384,78 @@ afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
priv = this->private;
if (op_ret < 0 && op_errno != ENOTCONN) {
- loc_gfid (&local->loc, gfid);
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_INODE_UNLOCK_FAIL,
- "%s: Failed to unlock %s "
- "with lk_owner: %s (%s)", uuid_utoa (gfid),
+ if (local->fd)
+ gf_uuid_copy (gfid, local->fd->inode->gfid);
+ else
+ loc_gfid (&local->loc, gfid);
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ AFR_MSG_UNLOCK_FAIL,
+ "%s: Failed to unlock %s on %s "
+ "with lk_owner: %s", uuid_utoa (gfid),
+ gf_fop_list[local->op],
priv->children[child_index]->name,
- lkowner_utoa (&frame->root->lk_owner),
- strerror (op_errno));
+ lkowner_utoa (&frame->root->lk_owner));
}
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
- }
+ if (call_count)
+ goto out;
+ if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+ afr_fop_lock_unwind (frame, local->op, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ goto out;
+ }
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ local->op_ret = -1;
+ local->op_ret = EUCLEAN;
+ local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+ afr_local_replies_wipe (local, priv);
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+ local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+ if (local->cont.inodelk.xdata)
+ dict_unref (local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.inodelk.xdata = dict_ref (local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+ if (local->cont.entrylk.xdata)
+ dict_unref (local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.entrylk.xdata = dict_ref (local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ afr_serialized_lock_wind (frame, this);
+out:
return 0;
}
-int32_t
-afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
- int call_count)
+static int32_t
+afr_unlock_locks_and_proceed (call_frame_t *frame, xlator_t *this,
+ int call_count)
{
int i = 0;
afr_private_t *priv = NULL;
@@ -3288,7 +3464,25 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
local = frame->local;
priv = this->private;
local->call_count = call_count;
- local->cont.inodelk.flock.l_type = F_UNLCK;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+ local->cont.inodelk.cmd = F_SETLK;
+ if (local->cont.inodelk.xdata)
+ dict_unref (local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+ if (local->cont.entrylk.xdata)
+ dict_unref (local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ break;
+ default:
+ break;
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
@@ -3297,13 +3491,7 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
if (local->replies[i].op_ret == -1)
continue;
- STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock, 0);
+ afr_fop_lock_wind (frame, this, i, afr_unlock_partial_lock_cbk);
if (!--call_count)
break;
@@ -3313,23 +3501,27 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
}
int32_t
-afr_inodelk_done (call_frame_t *frame, xlator_t *this)
+afr_fop_lock_done (call_frame_t *frame, xlator_t *this)
{
int i = 0;
int lock_count = 0;
+ unsigned char *success = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
+ success = alloca0(priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
continue;
- if (local->replies[i].op_ret == 0)
+ if (local->replies[i].op_ret == 0) {
lock_count++;
+ success[i] = 1;
+ }
if (local->op_ret == -1 && local->op_errno == EAGAIN)
continue;
@@ -3347,20 +3539,29 @@ afr_inodelk_done (call_frame_t *frame, xlator_t *this)
local->op_errno = local->replies[i].op_errno;
}
- if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK &&
- (local->op_ret == -1 && local->op_errno == EAGAIN)) {
- afr_unlock_inodelks_and_unwind (frame, this,
- lock_count);
+ if (afr_fop_lock_is_unlock (frame) || (lock_count == 0))
+ goto unwind;
+
+ if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) {
+ afr_unlock_locks_and_proceed (frame, this, lock_count);
+ } else if (priv->quorum_count && !afr_has_quorum (success, this)) {
+ local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+ local->op_ret = -1;
+ local->op_errno = afr_quorum_errno (priv);
+ afr_unlock_locks_and_proceed (frame, this, lock_count);
} else {
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
+ goto unwind;
}
return 0;
+unwind:
+ afr_fop_lock_unwind (frame, local->op, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ return 0;
}
-int
-afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+static int
+afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3384,32 +3585,8 @@ afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
static int32_t
-afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
-{
- int call_count = 0;
-
- afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_inodelk_done (frame, this);
-
- return 0;
-}
-
-static gf_boolean_t
-afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
-{
- if (op_ret == -1 && op_errno == EAGAIN)
- return _gf_true;
- return _gf_false;
-}
-
-static int32_t
-afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_serialized_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3420,7 +3597,7 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
for (next_child = child_index + 1; next_child < priv->child_count;
next_child++) {
@@ -3430,80 +3607,117 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (afr_is_conflicting_lock_present (op_ret, op_errno) ||
(next_child == priv->child_count)) {
- afr_inodelk_done (frame, this);
+ afr_fop_lock_done (frame, this);
} else {
- STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
- (void *) (long) next_child,
- priv->children[next_child],
- priv->children[next_child]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
+ afr_fop_lock_wind (frame, this, next_child,
+ afr_serialized_lock_cbk);
}
return 0;
}
static int
-afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this)
+afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int call_count = 0;
int i = 0;
priv = this->private;
local = frame->local;
- call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
- if (!--call_count)
+ if (local->child_up[i]) {
+ afr_fop_lock_wind (frame, this, i,
+ afr_serialized_lock_cbk);
break;
+ }
}
return 0;
}
+static int32_t
+afr_parallel_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ int call_count = 0;
+
+ afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_fop_lock_done (frame, this);
+
+ return 0;
+}
+
static int
-afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this)
+afr_parallel_lock_wind (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ int call_count = 0;
int i = 0;
priv = this->private;
local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
+ if (!local->child_up[i])
+ continue;
+ afr_fop_lock_wind (frame, this, i, afr_parallel_lock_cbk);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
+
+static int
+afr_fop_handle_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+
+ if (!afr_fop_lock_is_unlock (frame)) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = F_SETLK;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+ break;
+ default:
break;
}
}
+
+ if (local->xdata_req) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.xdata = dict_ref (local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.xdata = dict_ref (local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ }
+
+ local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+ afr_parallel_lock_wind (frame, this);
return 0;
}
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
+static int32_t
+afr_handle_inodelk (call_frame_t *frame, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -3512,259 +3726,113 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
if (!local)
goto out;
- loc_copy (&local->loc, loc);
+ local->op = fop;
+ if (loc)
+ loc_copy (&local->loc, loc);
+ if (fd)
+ local->fd = fd_ref (fd);
+
local->cont.inodelk.volume = gf_strdup (volume);
if (!local->cont.inodelk.volume) {
op_errno = ENOMEM;
goto out;
}
+ local->cont.inodelk.in_cmd = cmd;
local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.in_flock = *flock;
local->cont.inodelk.flock = *flock;
if (xdata)
local->xdata_req = dict_ref (xdata);
- /* At least one child is up */
- /*
- * Non-blocking locks also need to be serialized. Otherwise there is
- * a chance that both the mounts which issued same non-blocking inodelk
- * may endup not acquiring the lock on any-brick.
- * Ex: Mount1 and Mount2
- * request for full length lock on file f1. Mount1 afr may acquire the
- * partial lock on brick-1 and may not acquire the lock on brick-2
- * because Mount2 already got the lock on brick-2, vice versa. Since
- * both the mounts only got partial locks, afr treats them as failure in
- * gaining the locks and unwinds with EAGAIN errno.
- */
- if (flock->l_type == F_UNLCK) {
- afr_parallel_inodelk_wind (frame, this);
- } else {
- afr_serialized_inodelk_wind (frame, this);
- }
-
- return 0;
+ op_errno = -afr_fop_handle_lock (frame, frame->this);
+ if (op_errno)
+ goto out;
+ return 0;
out:
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL);
return 0;
}
-
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno, xdata);
-
+ afr_handle_inodelk (frame, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
return 0;
}
-
int32_t
afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock, xdata);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-out:
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno, xdata);
-
+ afr_handle_inodelk (frame, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
return 0;
}
-
-int
-afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+static int
+afr_handle_entrylk (call_frame_t *frame, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ int32_t op_errno = ENOMEM;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- if (!--call_count)
- break;
- }
+ local->op = fop;
+ if (loc)
+ loc_copy (&local->loc, loc);
+ if (fd)
+ local->fd = fd_ref (fd);
+ local->cont.entrylk.cmd = cmd;
+ local->cont.entrylk.in_cmd = cmd;
+ local->cont.entrylk.type = type;
+ local->cont.entrylk.volume = gf_strdup (volume);
+ local->cont.entrylk.basename = gf_strdup (basename);
+ if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+ op_errno = ENOMEM;
+ goto out;
}
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+ op_errno = -afr_fop_handle_lock (frame, frame->this);
+ if (op_errno)
+ goto out;
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
-
+ afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL);
return 0;
}
-
-
int
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno, xdata);
-
+ afr_handle_entrylk (frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
return 0;
}
-
int
afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-out:
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
-
+ afr_handle_entrylk (frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
return 0;
}
-
int
afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct statvfs *statvfs, dict_t *xdata)
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 0bd9ffe..6019454 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -669,7 +669,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
gf_msg (this->name, GF_LOG_ERROR, op_errno,
- AFR_MSG_INODE_UNLOCK_FAIL,
+ AFR_MSG_UNLOCK_FAIL,
"path=%s gfid=%s: unlock failed on subvolume %s "
"with lock owner %s", local->loc.path,
loc_gfid_utoa (&(local->loc)),
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index c7af18d..00e689b 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -130,11 +130,11 @@
/*!
* @messageid 108010
- * @diagnosis Inode unlocks failed on a brick.
+ * @diagnosis unlocks failed on a brick.
* @recommendedaction Error number in the log should give the reason why it
* failed. Also observe brick logs for more information.
*/
-#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10)
+#define AFR_MSG_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10)
/*!
* @messageid 108011
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index bff8226..1ed327c 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -136,14 +136,6 @@ afr_needs_changelog_update (afr_local_t *local)
return _gf_false;
}
-static int32_t
-afr_quorum_errno (afr_private_t *priv)
-{
- if (priv->quorum_reads)
- return ENOTCONN;
- return EROFS;
-}
-
int
__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
{
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 17b997e..a9cab13 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -348,6 +348,11 @@ typedef struct {
int readdir_subvol;
} afr_fd_ctx_t;
+typedef enum {
+ AFR_FOP_LOCK_PARALLEL,
+ AFR_FOP_LOCK_SERIAL,
+ AFR_FOP_LOCK_QUORUM_FAILED,
+} afr_fop_lock_state_t;
typedef struct _afr_local {
glusterfs_fop_t op;
@@ -665,10 +670,22 @@ typedef struct _afr_local {
struct {
char *volume;
int32_t cmd;
+ int32_t in_cmd;
+ struct gf_flock in_flock;
struct gf_flock flock;
+ void *xdata;
} inodelk;
struct {
+ char *volume;
+ char *basename;
+ entrylk_cmd in_cmd;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ void *xdata;
+ } entrylk;
+
+ struct {
off_t offset;
gf_seek_what_t what;
} seek;
@@ -788,6 +805,7 @@ typedef struct _afr_local {
gf_boolean_t need_full_crawl;
gf_boolean_t compound;
+ afr_fop_lock_state_t fop_lock_state;
} afr_local_t;
@@ -1200,4 +1218,7 @@ void
afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
dict_t *newloc_xdata);
+int32_t
+afr_quorum_errno (afr_private_t *priv);
+
#endif /* __AFR_H__ */
--
2.9.3