Blob Blame History Raw
From fd75683b1e1334d60fbbca97f7d35dd50d61b404 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 7 Nov 2016 14:47:34 +0530
Subject: [PATCH 195/206] cluster/afr: Fix bugs in [f]inodelk/[f]entrylk

Problems:
1) Inodelk is not taking quorum into account
2) finodelk, [f]entrylk are not implemented correctly
3) By default afr doesn't go for non-blocking parallel locks.

Fix:
Implemented a common framework which can be used by
[f]inodelk/[f]entrylk.  Used quorum for the same.

 >Change-Id: I239f13875a065298630d266941df10cfa3addc85
 >BUG: 1369077
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/15802
 >Tested-by: Krutika Dhananjay <kdhananj@redhat.com>
 >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
 >Smoke: Gluster Build System <jenkins@build.gluster.org>
 >Reviewed-by: Ravishankar N <ravishankar@redhat.com>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>

BUG: 1393694
Change-Id: If36907dad803b4774372036a54f8a034cd4155f5
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/91346
---
 tests/basic/afr/inodelk.t                 |  87 ++++
 xlators/cluster/afr/src/afr-common.c      | 676 ++++++++++++++++--------------
 xlators/cluster/afr/src/afr-lk-common.c   |   2 +-
 xlators/cluster/afr/src/afr-messages.h    |   4 +-
 xlators/cluster/afr/src/afr-transaction.c |   8 -
 xlators/cluster/afr/src/afr.h             |  21 +
 6 files changed, 483 insertions(+), 315 deletions(-)
 create mode 100644 tests/basic/afr/inodelk.t

diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t
new file mode 100644
index 0000000..a32aa85
--- /dev/null
+++ b/tests/basic/afr/inodelk.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#This test tests that inodelk fails when quorum is not met. Also tests the
+#success case where inodelk is obtained and unlocks are done correctly.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+
+#Test success case
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+
+#If there is a problem with inodelk unlocking the following would hang.
+TEST mv $M0/dir2 $M0/dir1
+
+#Test failure case by bringing two of the bricks down
+#Test that the directory is not moved partially on some bricks but successful
+#on other subvol where quorum meets. Do that for both set of bricks
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! mv $M0/dir1 $M0/dir2
+
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST ! mv $M0/dir1 $M0/dir2
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+#Bring the bricks back up and try mv once more, it should succeed.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+TEST mv $M0/dir1 $M0/dir2
+cleanup;
+#Do similar tests on replica 2
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+#Because we don't know hashed subvol, do the same test twice bringing 1 brick
+#from each down, quorum calculation should allow it.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/dir1 $M0/dir2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index dec6026..ab60406 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,6 +45,14 @@
 #include "afr-messages.h"
 #include "compound-fop-utils.h"
 
+int32_t
+afr_quorum_errno (afr_private_t *priv)
+{
+        if (priv->quorum_reads)
+                return ENOTCONN;
+        return EROFS;
+}
+
 call_frame_t *
 afr_copy_frame (call_frame_t *base)
 {
@@ -1558,6 +1566,29 @@ afr_remove_eager_lock_stub (afr_local_t *local)
         UNLOCK (&local->fd->lock);
 }
 
+static gf_boolean_t
+afr_fop_lock_is_unlock (call_frame_t *frame)
+{
+        afr_local_t *local = frame->local;
+        switch (local->op) {
+        case GF_FOP_INODELK:
+        case GF_FOP_FINODELK:
+                if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+                   (local->cont.inodelk.in_cmd == F_SETLKW ||
+                    local->cont.inodelk.in_cmd == F_SETLK))
+                        return _gf_true;
+                break;
+        case GF_FOP_ENTRYLK:
+        case GF_FOP_FENTRYLK:
+                if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+                                 return _gf_true;
+                 break;
+        default:
+                 break;
+        }
+        return _gf_false;
+}
+
 void
 afr_local_cleanup (afr_local_t *local, xlator_t *this)
 {
@@ -1681,6 +1712,15 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
 
         { /* inodelk */
                 GF_FREE (local->cont.inodelk.volume);
+                if (local->cont.inodelk.xdata)
+                        dict_unref (local->cont.inodelk.xdata);
+        }
+
+        { /* entrylk */
+                GF_FREE (local->cont.entrylk.volume);
+                GF_FREE (local->cont.entrylk.basename);
+                if (local->cont.entrylk.xdata)
+                        dict_unref (local->cont.entrylk.xdata);
         }
 
         if (local->xdata_req)
@@ -3242,10 +3282,96 @@ out:
 
 /* }}} */
 
-int32_t
-afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
-                                xlator_t *this, int32_t op_ret,
-                                int32_t op_errno, dict_t *xdata)
+static int
+afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this);
+
+static gf_boolean_t
+afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
+{
+        if (op_ret == -1 && op_errno == EAGAIN)
+                return _gf_true;
+        return _gf_false;
+}
+
+static void
+afr_fop_lock_unwind (call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+                     int32_t op_errno, dict_t *xdata)
+{
+        switch (op) {
+        case GF_FOP_INODELK:
+                AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+                break;
+        case GF_FOP_FINODELK:
+                AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+                break;
+        case GF_FOP_ENTRYLK:
+                AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+                break;
+        case GF_FOP_FENTRYLK:
+                AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
+                break;
+        default:
+                break;
+        }
+}
+
+static void
+afr_fop_lock_wind (call_frame_t *frame, xlator_t *this, int child_index,
+                   int32_t (*lock_cbk) (call_frame_t *, void *, xlator_t *,
+                                        int32_t, int32_t, dict_t *))
+{
+        afr_local_t *local = frame->local;
+        afr_private_t *priv = this->private;
+        int i = child_index;
+
+        switch (local->op) {
+        case GF_FOP_INODELK:
+                STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->inodelk,
+                                   (const char *)local->cont.inodelk.volume,
+                                   &local->loc, local->cont.inodelk.cmd,
+                                   &local->cont.inodelk.flock,
+                                   local->cont.inodelk.xdata);
+                break;
+        case GF_FOP_FINODELK:
+                STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->finodelk,
+                                   (const char *)local->cont.inodelk.volume,
+                                   local->fd, local->cont.inodelk.cmd,
+                                   &local->cont.inodelk.flock,
+                                   local->cont.inodelk.xdata);
+                break;
+        case GF_FOP_ENTRYLK:
+                STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->entrylk,
+                                   local->cont.entrylk.volume, &local->loc,
+                                   local->cont.entrylk.basename,
+                                   local->cont.entrylk.cmd,
+                                   local->cont.entrylk.type,
+                                   local->cont.entrylk.xdata);
+                break;
+        case GF_FOP_FENTRYLK:
+                STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->fentrylk,
+                                   local->cont.entrylk.volume, local->fd,
+                                   local->cont.entrylk.basename,
+                                   local->cont.entrylk.cmd,
+                                   local->cont.entrylk.type,
+                                   local->cont.entrylk.xdata);
+                break;
+        default:
+                break;
+        }
+}
+
+static int32_t
+afr_unlock_partial_lock_cbk (call_frame_t *frame, void *cookie,
+                             xlator_t *this, int32_t op_ret,
+                             int32_t op_errno, dict_t *xdata)
 
 {
         afr_local_t *local = NULL;
@@ -3258,28 +3384,78 @@ afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
         priv = this->private;
 
         if (op_ret < 0 && op_errno != ENOTCONN) {
-                loc_gfid (&local->loc, gfid);
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        AFR_MSG_INODE_UNLOCK_FAIL,
-                        "%s: Failed to unlock %s "
-                        "with lk_owner: %s (%s)", uuid_utoa (gfid),
+                if (local->fd)
+                        gf_uuid_copy (gfid, local->fd->inode->gfid);
+                else
+                        loc_gfid (&local->loc, gfid);
+                gf_msg (this->name, GF_LOG_ERROR, op_errno,
+                        AFR_MSG_UNLOCK_FAIL,
+                        "%s: Failed to unlock %s on %s "
+                        "with lk_owner: %s", uuid_utoa (gfid),
+                        gf_fop_list[local->op],
                         priv->children[child_index]->name,
-                        lkowner_utoa (&frame->root->lk_owner),
-                        strerror (op_errno));
+                        lkowner_utoa (&frame->root->lk_owner));
         }
 
         call_count = afr_frame_return (frame);
-        if (call_count == 0) {
-                AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
-                                  local->op_errno, local->xdata_rsp);
-        }
+        if (call_count)
+                goto out;
 
+        if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+                afr_fop_lock_unwind (frame, local->op, local->op_ret,
+                                     local->op_errno, local->xdata_rsp);
+                goto out;
+        }
+        /* At least one child is up */
+        /*
+         * Non-blocking locks also need to be serialized.  Otherwise there is
+         * a chance that both the mounts which issued same non-blocking inodelk
+         * may endup not acquiring the lock on any-brick.
+         * Ex: Mount1 and Mount2
+         * request for full length lock on file f1.  Mount1 afr may acquire the
+         * partial lock on brick-1 and may not acquire the lock on brick-2
+         * because Mount2 already got the lock on brick-2, vice versa.  Since
+         * both the mounts only got partial locks, afr treats them as failure in
+         * gaining the locks and unwinds with EAGAIN errno.
+         */
+        local->op_ret = -1;
+        local->op_ret = EUCLEAN;
+        local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+        afr_local_replies_wipe (local, priv);
+        if (local->xdata_rsp)
+                dict_unref (local->xdata_rsp);
+        local->xdata_rsp = NULL;
+        switch (local->op) {
+        case GF_FOP_INODELK:
+        case GF_FOP_FINODELK:
+                local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+                local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+                if (local->cont.inodelk.xdata)
+                        dict_unref (local->cont.inodelk.xdata);
+                local->cont.inodelk.xdata = NULL;
+                if (local->xdata_req)
+                        local->cont.inodelk.xdata = dict_ref (local->xdata_req);
+                break;
+        case GF_FOP_ENTRYLK:
+        case GF_FOP_FENTRYLK:
+                local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+                if (local->cont.entrylk.xdata)
+                        dict_unref (local->cont.entrylk.xdata);
+                local->cont.entrylk.xdata = NULL;
+                if (local->xdata_req)
+                        local->cont.entrylk.xdata = dict_ref (local->xdata_req);
+                break;
+        default:
+                break;
+        }
+        afr_serialized_lock_wind (frame, this);
+out:
         return 0;
 }
 
-int32_t
-afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
-                                int call_count)
+static int32_t
+afr_unlock_locks_and_proceed (call_frame_t *frame, xlator_t *this,
+                             int call_count)
 {
         int i = 0;
         afr_private_t *priv = NULL;
@@ -3288,7 +3464,25 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
         local = frame->local;
         priv = this->private;
         local->call_count = call_count;
-        local->cont.inodelk.flock.l_type = F_UNLCK;
+        switch (local->op) {
+        case GF_FOP_INODELK:
+        case GF_FOP_FINODELK:
+                local->cont.inodelk.flock.l_type = F_UNLCK;
+                local->cont.inodelk.cmd = F_SETLK;
+                if (local->cont.inodelk.xdata)
+                        dict_unref (local->cont.inodelk.xdata);
+                local->cont.inodelk.xdata = NULL;
+                break;
+        case GF_FOP_ENTRYLK:
+        case GF_FOP_FENTRYLK:
+                local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+                if (local->cont.entrylk.xdata)
+                        dict_unref (local->cont.entrylk.xdata);
+                local->cont.entrylk.xdata = NULL;
+                break;
+        default:
+                break;
+        }
 
         for (i = 0; i < priv->child_count; i++) {
                 if (!local->replies[i].valid)
@@ -3297,13 +3491,7 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
                 if (local->replies[i].op_ret == -1)
                         continue;
 
-                STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk,
-                                   (void*) (long) i,
-                                   priv->children[i],
-                                   priv->children[i]->fops->inodelk,
-                                   local->cont.inodelk.volume,
-                                   &local->loc, local->cont.inodelk.cmd,
-                                   &local->cont.inodelk.flock, 0);
+                afr_fop_lock_wind (frame, this, i, afr_unlock_partial_lock_cbk);
 
                 if (!--call_count)
                         break;
@@ -3313,23 +3501,27 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
 }
 
 int32_t
-afr_inodelk_done (call_frame_t *frame, xlator_t *this)
+afr_fop_lock_done (call_frame_t *frame, xlator_t *this)
 {
         int i = 0;
         int lock_count = 0;
+        unsigned char *success = NULL;
 
         afr_local_t *local = NULL;
         afr_private_t *priv = NULL;
 
         local = frame->local;
         priv = this->private;
+        success = alloca0(priv->child_count);
 
         for (i = 0; i < priv->child_count; i++) {
                 if (!local->replies[i].valid)
                         continue;
 
-                if (local->replies[i].op_ret == 0)
+                if (local->replies[i].op_ret == 0) {
                         lock_count++;
+                        success[i] = 1;
+                }
 
                 if (local->op_ret == -1 && local->op_errno == EAGAIN)
                         continue;
@@ -3347,20 +3539,29 @@ afr_inodelk_done (call_frame_t *frame, xlator_t *this)
                 local->op_errno = local->replies[i].op_errno;
         }
 
-        if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK &&
-            (local->op_ret == -1 && local->op_errno == EAGAIN)) {
-                afr_unlock_inodelks_and_unwind (frame, this,
-                                                lock_count);
+        if (afr_fop_lock_is_unlock (frame) || (lock_count == 0))
+                goto unwind;
+
+        if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) {
+                afr_unlock_locks_and_proceed (frame, this, lock_count);
+        } else if (priv->quorum_count && !afr_has_quorum (success, this)) {
+                local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+                local->op_ret = -1;
+                local->op_errno = afr_quorum_errno (priv);
+                afr_unlock_locks_and_proceed (frame, this, lock_count);
         } else {
-                AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
-                                  local->op_errno, local->xdata_rsp);
+                goto unwind;
         }
 
         return 0;
+unwind:
+        afr_fop_lock_unwind (frame, local->op, local->op_ret,
+                             local->op_errno, local->xdata_rsp);
+        return 0;
 }
 
-int
-afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+static int
+afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
 {
         afr_local_t *local = NULL;
@@ -3384,32 +3585,8 @@ afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 }
 
 static int32_t
-afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
-{
-        int     call_count = 0;
-
-        afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
-        call_count = afr_frame_return (frame);
-        if (call_count == 0)
-                afr_inodelk_done (frame, this);
-
-        return 0;
-}
-
-static gf_boolean_t
-afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
-{
-        if (op_ret == -1 && op_errno == EAGAIN)
-                return _gf_true;
-        return _gf_false;
-}
-
-static int32_t
-afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-		            int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_serialized_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
 
 {
         afr_local_t *local = NULL;
@@ -3420,7 +3597,7 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         local = frame->local;
         priv = this->private;
 
-        afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+        afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
 
         for (next_child = child_index + 1; next_child < priv->child_count;
              next_child++) {
@@ -3430,80 +3607,117 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         if (afr_is_conflicting_lock_present (op_ret, op_errno) ||
             (next_child == priv->child_count)) {
-                afr_inodelk_done (frame, this);
+                afr_fop_lock_done (frame, this);
         } else {
-                STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
-                                   (void *) (long) next_child,
-                                   priv->children[next_child],
-                                   priv->children[next_child]->fops->inodelk,
-                                   (const char *)local->cont.inodelk.volume,
-                                   &local->loc, local->cont.inodelk.cmd,
-                                   &local->cont.inodelk.flock,
-                                   local->xdata_req);
+                afr_fop_lock_wind (frame, this, next_child,
+                                   afr_serialized_lock_cbk);
         }
 
         return 0;
 }
 
 static int
-afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this)
+afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this)
 {
         afr_private_t *priv = NULL;
         afr_local_t *local  = NULL;
-        int         call_count = 0;
         int i = 0;
 
         priv = this->private;
         local = frame->local;
-        call_count = local->call_count;
 
         for (i = 0; i < priv->child_count; i++) {
-                if (!local->child_up[i])
-                        continue;
-                STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk,
-                                   (void *) (long) i,
-                                   priv->children[i],
-                                   priv->children[i]->fops->inodelk,
-                                   (const char *)local->cont.inodelk.volume,
-                                   &local->loc, local->cont.inodelk.cmd,
-                                   &local->cont.inodelk.flock,
-                                   local->xdata_req);
-                if (!--call_count)
+                if (local->child_up[i]) {
+                        afr_fop_lock_wind (frame, this, i,
+                                           afr_serialized_lock_cbk);
                         break;
+                }
         }
         return 0;
 }
 
+static int32_t
+afr_parallel_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                       int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+        int     call_count = 0;
+
+        afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+        call_count = afr_frame_return (frame);
+        if (call_count == 0)
+                afr_fop_lock_done (frame, this);
+
+        return 0;
+}
+
 static int
-afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this)
+afr_parallel_lock_wind (call_frame_t *frame, xlator_t *this)
 {
         afr_private_t *priv = NULL;
         afr_local_t *local  = NULL;
+        int         call_count = 0;
         int i = 0;
 
         priv = this->private;
         local = frame->local;
+        call_count = local->call_count;
 
         for (i = 0; i < priv->child_count; i++) {
-                if (local->child_up[i]) {
-                        STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
-                                           (void *) (long) i,
-                                           priv->children[i],
-                                           priv->children[i]->fops->inodelk,
-                                       (const char *)local->cont.inodelk.volume,
-                                           &local->loc, local->cont.inodelk.cmd,
-                                           &local->cont.inodelk.flock,
-                                           local->xdata_req);
+                if (!local->child_up[i])
+                        continue;
+                afr_fop_lock_wind (frame, this, i, afr_parallel_lock_cbk);
+                if (!--call_count)
+                        break;
+        }
+        return 0;
+}
+
+static int
+afr_fop_handle_lock (call_frame_t *frame, xlator_t *this)
+{
+        afr_local_t *local = frame->local;
+
+        if (!afr_fop_lock_is_unlock (frame)) {
+                switch (local->op) {
+                case GF_FOP_INODELK:
+                case GF_FOP_FINODELK:
+                        local->cont.inodelk.cmd = F_SETLK;
+                        break;
+                case GF_FOP_ENTRYLK:
+                case GF_FOP_FENTRYLK:
+                        local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+                        break;
+                default:
                         break;
                 }
         }
+
+        if (local->xdata_req) {
+                switch (local->op) {
+                case GF_FOP_INODELK:
+                case GF_FOP_FINODELK:
+                        local->cont.inodelk.xdata = dict_ref (local->xdata_req);
+                        break;
+                case GF_FOP_ENTRYLK:
+                case GF_FOP_FENTRYLK:
+                        local->cont.entrylk.xdata = dict_ref (local->xdata_req);
+                        break;
+                default:
+                        break;
+                }
+        }
+
+        local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+        afr_parallel_lock_wind (frame, this);
         return 0;
 }
 
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
-             const char *volume, loc_t *loc, int32_t cmd,
-             struct gf_flock *flock, dict_t *xdata)
+static int32_t
+afr_handle_inodelk (call_frame_t *frame, glusterfs_fop_t fop,
+                    const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+                    struct gf_flock *flock, dict_t *xdata)
 {
         afr_local_t *local  = NULL;
         int32_t op_errno = ENOMEM;
@@ -3512,259 +3726,113 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
         if (!local)
                 goto out;
 
-        loc_copy (&local->loc, loc);
+        local->op = fop;
+        if (loc)
+                loc_copy (&local->loc, loc);
+        if (fd)
+                local->fd = fd_ref (fd);
+
         local->cont.inodelk.volume = gf_strdup (volume);
         if (!local->cont.inodelk.volume) {
                 op_errno = ENOMEM;
                 goto out;
         }
 
+        local->cont.inodelk.in_cmd = cmd;
         local->cont.inodelk.cmd = cmd;
+        local->cont.inodelk.in_flock = *flock;
         local->cont.inodelk.flock = *flock;
         if (xdata)
                 local->xdata_req = dict_ref (xdata);
 
-        /* At least one child is up */
-        /*
-         * Non-blocking locks also need to be serialized.  Otherwise there is
-         * a chance that both the mounts which issued same non-blocking inodelk
-         * may endup not acquiring the lock on any-brick.
-         * Ex: Mount1 and Mount2
-         * request for full length lock on file f1.  Mount1 afr may acquire the
-         * partial lock on brick-1 and may not acquire the lock on brick-2
-         * because Mount2 already got the lock on brick-2, vice versa.  Since
-         * both the mounts only got partial locks, afr treats them as failure in
-         * gaining the locks and unwinds with EAGAIN errno.
-         */
-        if (flock->l_type == F_UNLCK) {
-                afr_parallel_inodelk_wind (frame, this);
-        } else {
-                afr_serialized_inodelk_wind (frame, this);
-        }
-
-	return 0;
+        op_errno = -afr_fop_handle_lock (frame, frame->this);
+        if (op_errno)
+                goto out;
+        return 0;
 out:
-	AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+        afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL);
 
         return 0;
 }
 
-
 int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-		  int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+             const char *volume, loc_t *loc, int32_t cmd,
+             struct gf_flock *flock, dict_t *xdata)
 {
-        afr_local_t *local = NULL;
-        int call_count = -1;
-
-        local = frame->local;
-
-        LOCK (&frame->lock);
-        {
-                if (op_ret == 0)
-                        local->op_ret = 0;
-
-                local->op_errno = op_errno;
-        }
-        UNLOCK (&frame->lock);
-
-        call_count = afr_frame_return (frame);
-
-        if (call_count == 0)
-                AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
-                                  local->op_errno, xdata);
-
+        afr_handle_inodelk (frame, GF_FOP_INODELK, volume, loc, NULL, cmd,
+                            flock, xdata);
         return 0;
 }
 
-
 int32_t
 afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
-	      int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
-        afr_private_t *priv = NULL;
-        afr_local_t *local  = NULL;
-        int i = 0;
-        int32_t call_count = 0;
-        int32_t op_errno = ENOMEM;
-
-        priv = this->private;
-
-	local = AFR_FRAME_INIT (frame, op_errno);
-	if (!local)
-		goto out;
-
-        call_count = local->call_count;
-	if (!call_count) {
-		op_errno = ENOTCONN;
-		goto out;
-	}
-
-        for (i = 0; i < priv->child_count; i++) {
-                if (local->child_up[i]) {
-                        STACK_WIND (frame, afr_finodelk_cbk,
-                                    priv->children[i],
-                                    priv->children[i]->fops->finodelk,
-                                    volume, fd, cmd, flock, xdata);
-
-                        if (!--call_count)
-                                break;
-                }
-        }
-
-	return 0;
-out:
-	AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
-
-        return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-                 int32_t op_ret, int32_t op_errno, dict_t *xdata)
+              int32_t cmd, struct gf_flock *flock, dict_t *xdata)
 {
-        afr_local_t *local = NULL;
-        int call_count = -1;
-
-        local = frame->local;
-
-        LOCK (&frame->lock);
-        {
-                if (op_ret == 0)
-                        local->op_ret = 0;
-
-                local->op_errno = op_errno;
-        }
-        UNLOCK (&frame->lock);
-
-        call_count = afr_frame_return (frame);
-
-        if (call_count == 0)
-                AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
-                                  local->op_errno, xdata);
-
+        afr_handle_inodelk (frame, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+                            flock, xdata);
         return 0;
 }
 
-
-int
-afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
-	     loc_t *loc, const char *basename, entrylk_cmd cmd,
-	     entrylk_type type, dict_t *xdata)
+static int
+afr_handle_entrylk (call_frame_t *frame, glusterfs_fop_t fop,
+                    const char *volume, loc_t *loc, fd_t *fd,
+                    const char *basename, entrylk_cmd cmd,
+                    entrylk_type type, dict_t *xdata)
 {
-        afr_private_t *priv = NULL;
         afr_local_t *local  = NULL;
-        int i = 0;
-        int32_t call_count = 0;
-        int32_t op_errno = 0;
-
-        priv = this->private;
-
-	local = AFR_FRAME_INIT (frame, op_errno);
-	if (!local)
-		goto out;
-
-        call_count = local->call_count;
-	if (!call_count) {
-		op_errno = ENOTCONN;
-		goto out;
-	}
+        int32_t op_errno = ENOMEM;
 
-        for (i = 0; i < priv->child_count; i++) {
-                if (local->child_up[i]) {
-                        STACK_WIND (frame, afr_entrylk_cbk,
-                                    priv->children[i],
-                                    priv->children[i]->fops->entrylk,
-                                    volume, loc, basename, cmd, type, xdata);
+        local = AFR_FRAME_INIT (frame, op_errno);
+        if (!local)
+                goto out;
 
-                        if (!--call_count)
-                                break;
-                }
+        local->op = fop;
+        if (loc)
+                loc_copy (&local->loc, loc);
+        if (fd)
+                local->fd = fd_ref (fd);
+        local->cont.entrylk.cmd = cmd;
+        local->cont.entrylk.in_cmd = cmd;
+        local->cont.entrylk.type = type;
+        local->cont.entrylk.volume = gf_strdup (volume);
+        local->cont.entrylk.basename = gf_strdup (basename);
+        if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+                op_errno = ENOMEM;
+                goto out;
         }
+        if (xdata)
+                local->xdata_req = dict_ref (xdata);
+        op_errno = -afr_fop_handle_lock (frame, frame->this);
+        if (op_errno)
+                goto out;
 
-	return 0;
+        return 0;
 out:
-	AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
-
+        afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL);
         return 0;
 }
 
-
-
 int
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
-		  int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+             loc_t *loc, const char *basename, entrylk_cmd cmd,
+             entrylk_type type, dict_t *xdata)
 {
-        afr_local_t *local = NULL;
-        int call_count = -1;
-
-        local = frame->local;
-
-        LOCK (&frame->lock);
-        {
-                if (op_ret == 0)
-                        local->op_ret = 0;
-
-                local->op_errno = op_errno;
-        }
-        UNLOCK (&frame->lock);
-
-        call_count = afr_frame_return (frame);
-
-        if (call_count == 0)
-                AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
-                                  local->op_errno, xdata);
-
+        afr_handle_entrylk (frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+                            cmd, type, xdata);
         return 0;
 }
 
-
 int
 afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
               const char *basename, entrylk_cmd cmd, entrylk_type type,
-	      dict_t *xdata)
+              dict_t *xdata)
 {
-        afr_private_t *priv = NULL;
-        afr_local_t *local  = NULL;
-        int i = 0;
-        int32_t call_count = 0;
-        int32_t op_errno = ENOMEM;
-
-        priv = this->private;
-
-        local = AFR_FRAME_INIT (frame, op_errno);
-	if (!local)
-		goto out;
-
-        call_count = local->call_count;
-	if (!call_count) {
-		op_errno = ENOTCONN;
-		goto out;
-	}
-
-        for (i = 0; i < priv->child_count; i++) {
-                if (local->child_up[i]) {
-                        STACK_WIND (frame, afr_fentrylk_cbk,
-                                    priv->children[i],
-                                    priv->children[i]->fops->fentrylk,
-                                    volume, fd, basename, cmd, type, xdata);
-
-                        if (!--call_count)
-                                break;
-                }
-        }
-
-	return 0;
-out:
-	AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
-
+        afr_handle_entrylk (frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+                            cmd, type, xdata);
         return 0;
 }
 
-
 int
 afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
 		int op_errno, struct statvfs *statvfs, dict_t *xdata)
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 0bd9ffe..6019454 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -669,7 +669,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
                 gf_msg (this->name, GF_LOG_ERROR, op_errno,
-                        AFR_MSG_INODE_UNLOCK_FAIL,
+                        AFR_MSG_UNLOCK_FAIL,
                         "path=%s gfid=%s: unlock failed on subvolume %s "
                         "with lock owner %s", local->loc.path,
                         loc_gfid_utoa (&(local->loc)),
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index c7af18d..00e689b 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -130,11 +130,11 @@
 
 /*!
  * @messageid 108010
- * @diagnosis Inode unlocks failed on a brick.
+ * @diagnosis unlocks failed on a brick.
  * @recommendedaction Error number in the log should give the reason why it
  * failed. Also observe brick logs for more information.
 */
-#define AFR_MSG_INODE_UNLOCK_FAIL       (GLFS_COMP_BASE_AFR + 10)
+#define AFR_MSG_UNLOCK_FAIL       (GLFS_COMP_BASE_AFR + 10)
 
 /*!
  * @messageid 108011
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index bff8226..1ed327c 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -136,14 +136,6 @@ afr_needs_changelog_update (afr_local_t *local)
         return _gf_false;
 }
 
-static int32_t
-afr_quorum_errno (afr_private_t *priv)
-{
-        if (priv->quorum_reads)
-                return ENOTCONN;
-        return EROFS;
-}
-
 int
 __afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
 {
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 17b997e..a9cab13 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -348,6 +348,11 @@ typedef struct {
 	int readdir_subvol;
 } afr_fd_ctx_t;
 
+typedef enum {
+        AFR_FOP_LOCK_PARALLEL,
+        AFR_FOP_LOCK_SERIAL,
+        AFR_FOP_LOCK_QUORUM_FAILED,
+} afr_fop_lock_state_t;
 
 typedef struct _afr_local {
 	glusterfs_fop_t  op;
@@ -665,10 +670,22 @@ typedef struct _afr_local {
                 struct {
                         char *volume;
                         int32_t cmd;
+                        int32_t in_cmd;
+                        struct gf_flock in_flock;
                         struct gf_flock flock;
+                        void *xdata;
                 } inodelk;
 
                 struct {
+                        char *volume;
+                        char *basename;
+                        entrylk_cmd in_cmd;
+                        entrylk_cmd cmd;
+                        entrylk_type type;
+                        void *xdata;
+                } entrylk;
+
+                struct {
                         off_t offset;
                         gf_seek_what_t what;
                 } seek;
@@ -788,6 +805,7 @@ typedef struct _afr_local {
 
         gf_boolean_t need_full_crawl;
         gf_boolean_t compound;
+        afr_fop_lock_state_t fop_lock_state;
 } afr_local_t;
 
 
@@ -1200,4 +1218,7 @@ void
 afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
                       dict_t *newloc_xdata);
 
+int32_t
+afr_quorum_errno (afr_private_t *priv);
+
 #endif /* __AFR_H__ */
-- 
2.9.3