12a457
From 13baca9f9e2e86a66713db708d06fa4927d298b5 Mon Sep 17 00:00:00 2001
12a457
From: Xavier Hernandez <xhernandez@datalab.es>
12a457
Date: Thu, 28 Apr 2016 08:42:40 +0200
12a457
Subject: [PATCH 173/178] cluster/ec: Fix issues with eager locking
12a457
12a457
Due to a race in timer cancellation, in some cases it was possible
12a457
to unlock the lock while another concurrent fop that needed it
12a457
continues execution as if it were not released.
12a457
12a457
This patch also fixes an issue that caused a lock to not be released
12a457
if an error was found while preparing ec_update_size_version().
12a457
12a457
master -
12a457
http://review.gluster.org/#/c/14112/
12a457
12a457
release-3.7 -
12a457
http://review.gluster.org/#/c/14174/
12a457
12a457
release-3.8 -
12a457
http://review.gluster.org/#/c/14206/
12a457
12a457
Change-Id: I1344a3f5ecfc333f05a09e62653838264c9c26b1
12a457
BUG: 1330997
12a457
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
12a457
Reviewed-on: http://review.gluster.org/14112
12a457
Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
Reviewed-by: Chen Chen <chenchen@smartquerier.com>
12a457
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/75107
12a457
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
12a457
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
12a457
---
12a457
 xlators/cluster/ec/src/ec-common.c |  242 ++++++++++++++++++++++++++----------
12a457
 xlators/cluster/ec/src/ec-data.h   |   25 +++-
12a457
 2 files changed, 192 insertions(+), 75 deletions(-)
12a457
12a457
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
12a457
index de0e597..58cfc73 100644
12a457
--- a/xlators/cluster/ec/src/ec-common.c
12a457
+++ b/xlators/cluster/ec/src/ec-common.c
12a457
@@ -711,8 +711,7 @@ void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
12a457
     link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
12a457
     link->base = base;
12a457
 
12a457
-    lock->refs++;
12a457
-    lock->inserted++;
12a457
+    lock->refs_pending++;
12a457
 }
12a457
 
12a457
 void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
12a457
@@ -1347,6 +1346,7 @@ ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
12a457
         list_move_tail(&link->wait_list, list);
12a457
 
12a457
         list_add_tail(&link->owner_list, &lock->owners);
12a457
+        lock->refs_owners++;
12a457
 
12a457
         ec_lock_update_fd(lock, fop);
12a457
     }
12a457
@@ -1478,6 +1478,8 @@ ec_lock_assign_owner(ec_lock_link_t *link)
12a457
     ec_lock_link_t *timer_link = NULL;
12a457
     gf_boolean_t assigned = _gf_false;
12a457
 
12a457
+    /* The link cannot be in any list because we have just finished preparing
12a457
+     * it. */
12a457
     GF_ASSERT(list_empty(&link->wait_list));
12a457
 
12a457
     fop = link->fop;
12a457
@@ -1485,27 +1487,85 @@ ec_lock_assign_owner(ec_lock_link_t *link)
12a457
 
12a457
     LOCK(&lock->loc.inode->lock);
12a457
 
12a457
-    GF_ASSERT (lock->inserted > 0);
12a457
-    lock->inserted--;
12a457
+    /* Since the link has just been prepared but it's not active yet, the
12a457
+     * refs_pending must be one at least (the ref owned by this link). */
12a457
+    GF_ASSERT (lock->refs_pending > 0);
12a457
+    /* The link is not pending any more. It will be assigned to the owner,
12a457
+     * waiting or frozen list. */
12a457
+    lock->refs_pending--;
12a457
 
12a457
     if (lock->release) {
12a457
         ec_trace("LOCK_QUEUE_FREEZE", fop, "lock=%p", lock);
12a457
 
12a457
-        list_add_tail(&link->wait_list, &lock->frozen);
12a457
+        /* When lock->release is set, we'll unlock the lock as soon as
12a457
+         * possible, meaning that we won't use a timer. */
12a457
+        GF_ASSERT(lock->timer == NULL);
12a457
 
12a457
-        /* The lock is frozen, so we move the current reference to refs_frozen.
12a457
-         * After that, there should remain at least one ref belonging to the
12a457
-         * lock that is processing the release. */
12a457
-        lock->refs--;
12a457
-        GF_ASSERT(lock->refs > 0);
12a457
-        lock->refs_frozen++;
12a457
+        /* The lock is marked to be released. We can still have owners and fops
12a457
+         * in the waiting ilist f they have been added before the lock has been
12a457
+         * marked to be released. However new fops are put into the frozen list
12a457
+         * to wait for the next unlock/lock cycle. */
12a457
+        list_add_tail(&link->wait_list, &lock->frozen);
12a457
 
12a457
         goto unlock;
12a457
     }
12a457
 
12a457
+    /* The lock is not marked to be released, so the frozen list should be
12a457
+     * empty. */
12a457
+    GF_ASSERT(list_empty(&lock->frozen));
12a457
+
12a457
+    if (lock->timer != NULL) {
12a457
+        /* We are trying to acquire a lock that has an unlock timer active.
12a457
+         * This means that the lock must be idle, i.e. no fop can be in the
12a457
+         * owner, waiting or frozen lists. It also means that the lock cannot
12a457
+         * have been marked as being released (this is done without timers)
12a457
+         * and it must not be exclusive. There should only be one owner
12a457
+         * reference, but it's possible that some fops are being prepared to
12a457
+         * use this lock. */
12a457
+        GF_ASSERT ((lock->exclusive == 0) && (lock->refs_owners == 1) &&
12a457
+                   list_empty(&lock->owners) && list_empty(&lock->waiting));
12a457
+
12a457
+        /* We take the timer_link before cancelling the timer, since a
12a457
+         * successful cancellation will destroy it. It must not be NULL
12a457
+         * because it references the fop responsible for the delayed unlock
12a457
+         * that we are currently trying to cancel. */
12a457
+        timer_link = lock->timer->data;
12a457
+        GF_ASSERT(timer_link != NULL);
12a457
+
12a457
+        gf_timer_call_cancel(fop->xl->ctx, lock->timer);
12a457
+        ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
12a457
+
12a457
+        /* We have two options here:
12a457
+         *
12a457
+         * 1. The timer has been successfully cancelled.
12a457
+         *
12a457
+         *    This is the easiest case and we can continue with the currently
12a457
+         *    acquired lock.
12a457
+         *
12a457
+         * 2. The timer callback has already been fired.
12a457
+         *
12a457
+         *    In this case we have not been able to cancel the timer before
12a457
+         *    the timer callback has been fired, but we also know that
12a457
+         *    lock->timer != NULL. This means that the timer callback is still
12a457
+         *    trying to acquire the inode mutex that we currently own. We are
12a457
+         *    safe until we release it. In this case we can safely clear
12a457
+         *    lock->timer. This will cause that the timer callback does nothing
12a457
+         *    once it acquires the mutex.
12a457
+         *
12a457
+         * In both cases we must release the owner reference assigned to the
12a457
+         * fop that was handling the unlock because ec_unlock_now() won't be
12a457
+         * called for that fop.
12a457
+         */
12a457
+        lock->timer = NULL;
12a457
+        lock->refs_owners--;
12a457
+    }
12a457
+
12a457
     lock->exclusive |= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
12a457
 
12a457
     if (!list_empty(&lock->owners)) {
12a457
+        /* There are other owners of this lock. We can only take ownership if
12a457
+         * the lock is already acquired and can be shared. Otherwise we need
12a457
+         * to wait. */
12a457
         if (!lock->acquired || (lock->exclusive != 0)) {
12a457
             ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
12a457
 
12a457
@@ -1513,36 +1573,24 @@ ec_lock_assign_owner(ec_lock_link_t *link)
12a457
 
12a457
             goto unlock;
12a457
         }
12a457
-    } else if (lock->timer != NULL) {
12a457
-        GF_ASSERT (lock->release == _gf_false);
12a457
-
12a457
-        timer_link = lock->timer->data;
12a457
-        if (gf_timer_call_cancel(fop->xl->ctx, lock->timer) == 0) {
12a457
-            ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
12a457
-            lock->timer = NULL;
12a457
-            lock->refs--;
12a457
-            /* There should remain at least 1 ref, the current one. */
12a457
-            GF_ASSERT(lock->refs > 0);
12a457
-        } else {
12a457
-            /* Timer expired and on the way to unlock.
12a457
-             * Set lock->release to _gf_true, so that this
12a457
-             * lock will be put in frozen list*/
12a457
-            timer_link = NULL;
12a457
-            lock->release = _gf_true;
12a457
-        }
12a457
     }
12a457
 
12a457
     list_add_tail(&link->owner_list, &lock->owners);
12a457
+    lock->refs_owners++;
12a457
 
12a457
     assigned = _gf_true;
12a457
 
12a457
 unlock:
12a457
     if (!assigned) {
12a457
+        /* We have not been able to take ownership of this lock. The fop must
12a457
+         * be put to sleep. */
12a457
         ec_sleep(fop);
12a457
     }
12a457
 
12a457
     UNLOCK(&lock->loc.inode->lock);
12a457
 
12a457
+    /* If we have cancelled the timer, we need to resume the fop that was
12a457
+     * waiting for it. */
12a457
     if (timer_link != NULL) {
12a457
         ec_resume(timer_link->fop, 0);
12a457
     }
12a457
@@ -1566,8 +1614,14 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
12a457
 
12a457
     ec_trace("LOCK_DONE", fop, "lock=%p", lock);
12a457
 
12a457
-    GF_ASSERT(!list_empty(&link->owner_list));
12a457
+    /* Current link must belong to the owner list of the lock. We don't
12a457
+     * decrement lock->refs_owners here because the inode mutex is released
12a457
+     * before ec_unlock() is called and we need to know when the last owner
12a457
+     * unlocks the lock to do proper cleanup. lock->refs_owners is used for
12a457
+     * this task. */
12a457
+    GF_ASSERT((lock->refs_owners > 0) && !list_empty(&link->owner_list));
12a457
     list_del_init(&link->owner_list);
12a457
+
12a457
     lock->release |= release;
12a457
 
12a457
     if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
12a457
@@ -1625,6 +1679,7 @@ ec_lock_unfreeze(ec_lock_link_t *link)
12a457
 {
12a457
     struct list_head list;
12a457
     ec_lock_t *lock;
12a457
+    gf_boolean_t destroy = _gf_false;
12a457
 
12a457
     lock = link->lock;
12a457
 
12a457
@@ -1632,18 +1687,30 @@ ec_lock_unfreeze(ec_lock_link_t *link)
12a457
 
12a457
     LOCK(&lock->loc.inode->lock);
12a457
 
12a457
-    lock->acquired = _gf_false;
12a457
+    /* The lock must be marked to be released here, since we have just released
12a457
+     * it and any attempt to assign it to more fops must have added them to the
12a457
+     * frozen list. We can only have one active reference here: the one that
12a457
+     * is processing this unfreeze. */
12a457
+    GF_ASSERT(lock->release && (lock->refs_owners == 1));
12a457
     lock->release = _gf_false;
12a457
-    lock->refs--;
12a457
+    lock->refs_owners = 0;
12a457
 
12a457
-    GF_ASSERT (lock->refs == lock->inserted);
12a457
-    GF_ASSERT(lock->exclusive == 0);
12a457
-    GF_ASSERT(list_empty(&lock->waiting) && list_empty(&lock->owners));
12a457
+    lock->acquired = _gf_false;
12a457
+
12a457
+    /* We are unfreezing a lock. This means that the lock has already been
12a457
+     * released. In this state it shouldn't be exclusive nor have a pending
12a457
+     * timer nor have any owner, and the waiting list should be empty. Only
12a457
+     * the frozen list can contain some fop. */
12a457
+    GF_ASSERT((lock->exclusive == 0) && (lock->timer == NULL) &&
12a457
+              list_empty(&lock->waiting) && list_empty(&lock->owners));
12a457
 
12a457
+    /* We move all frozen fops to the waiting list. */
12a457
     list_splice_init(&lock->frozen, &lock->waiting);
12a457
-    lock->refs += lock->refs_frozen;
12a457
-    lock->refs_frozen = 0;
12a457
-    if (lock->refs == 0) {
12a457
+
12a457
+    /* If we don't have any fop waiting nor there are any prepared fops using
12a457
+     * this lock, we can finally dispose it. */
12a457
+    destroy = list_empty(&lock->waiting) && (lock->refs_pending == 0);
12a457
+    if (destroy) {
12a457
         ec_trace("LOCK_DESTROY", link->fop, "lock=%p", lock);
12a457
 
12a457
         lock->ctx->inode_lock = NULL;
12a457
@@ -1657,7 +1724,7 @@ ec_lock_unfreeze(ec_lock_link_t *link)
12a457
 
12a457
     ec_lock_resume_shared(&list);
12a457
 
12a457
-    if (lock->refs == 0) {
12a457
+    if (destroy) {
12a457
         ec_lock_destroy(lock);
12a457
     }
12a457
 }
12a457
@@ -1770,9 +1837,6 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
12a457
 
12a457
     fop = link->fop;
12a457
 
12a457
-    GF_ASSERT(version[0] < 0x100000000);
12a457
-    GF_ASSERT(version[1] < 0x100000000);
12a457
-
12a457
     ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
12a457
              version[0], version[1], size, dirty[0], dirty[1]);
12a457
 
12a457
@@ -1814,7 +1878,7 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
12a457
         }
12a457
     }
12a457
 
12a457
-    /* If config information is not know, we request it now. */
12a457
+    /* If config information is not known, we request it now. */
12a457
     if ((lock->loc.inode->ia_type == IA_IFREG) && !ctx->have_config) {
12a457
         /* A failure requesting this xattr is ignored because it's not
12a457
          * absolutely required right now. */
12a457
@@ -1850,6 +1914,13 @@ out:
12a457
 
12a457
     gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
12a457
             "Unable to update version and size");
12a457
+
12a457
+    if ((fop->parent->id != GF_FOP_FLUSH) &&
12a457
+        (fop->parent->id != GF_FOP_FSYNC) &&
12a457
+        (fop->parent->id != GF_FOP_FSYNCDIR)) {
12a457
+        ec_unlock_lock(fop->data);
12a457
+    }
12a457
+
12a457
 }
12a457
 
12a457
 gf_boolean_t
12a457
@@ -1900,7 +1971,6 @@ ec_unlock_now(ec_lock_link_t *link)
12a457
 void
12a457
 ec_unlock_timer_del(ec_lock_link_t *link)
12a457
 {
12a457
-        int32_t before = 0;
12a457
         ec_lock_t *lock;
12a457
         inode_t *inode;
12a457
         gf_boolean_t now = _gf_false;
12a457
@@ -1922,22 +1992,24 @@ ec_unlock_timer_del(ec_lock_link_t *link)
12a457
         if (lock->timer != NULL) {
12a457
                 ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
12a457
 
12a457
+                /* The unlock timer has expired without anyone cancelling it.
12a457
+                 * This means that it shouldn't have any owner, and the
12a457
+                 * waiting and frozen lists should be empty. It shouldn't have
12a457
+                 * been marked as release nor be exclusive either. It must have
12a457
+                 * only one owner reference, but there can be fops being
12a457
+                 * prepared though. */
12a457
+                GF_ASSERT(!lock->release && (lock->exclusive == 0) &&
12a457
+                          (lock->refs_owners == 1) &&
12a457
+                          list_empty(&lock->owners) &&
12a457
+                          list_empty(&lock->waiting) &&
12a457
+                          list_empty(&lock->frozen));
12a457
+
12a457
                 gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
12a457
                 lock->timer = NULL;
12a457
 
12a457
+                /* Any fop being processed from now on, will need to wait
12a457
+                 * until the next unlock/lock cycle. */
12a457
                 lock->release = now = _gf_true;
12a457
-
12a457
-                /* TODO: If the assertion is really true, following code is
12a457
-                 *       not needed. */
12a457
-                GF_ASSERT(list_empty(&lock->waiting));
12a457
-
12a457
-                before = lock->refs + lock->refs_frozen;
12a457
-                list_splice_init(&lock->waiting, &lock->frozen);
12a457
-                lock->refs_frozen += lock->refs - lock->inserted - 1;
12a457
-                lock->refs = 1 + lock->inserted;
12a457
-                /* We moved around the locks, so total number of locks shouldn't
12a457
-                 * change by this operation*/
12a457
-                GF_ASSERT (before == (lock->refs + lock->refs_frozen));
12a457
         }
12a457
 
12a457
         UNLOCK(&inode->lock);
12a457
@@ -1961,24 +2033,50 @@ void ec_unlock_timer_add(ec_lock_link_t *link)
12a457
 
12a457
     LOCK(&lock->loc.inode->lock);
12a457
 
12a457
-    GF_ASSERT(lock->timer == NULL);
12a457
+    /* We are trying to unlock the lock. We can have multiple scenarios here,
12a457
+     * but all of them need to have lock->timer == NULL:
12a457
+     *
12a457
+     * 1. There are other owners currently running that can call ec_unlock().
12a457
+     *
12a457
+     *    None of them can have started the timer until the last one. But this
12a457
+     *    call should be the consequence of this lastest one.
12a457
+     *
12a457
+     * 2. There are fops in the waiting or frozen lists.
12a457
+     *
12a457
+     *    These fops cannot call ec_unlock(). So we should be here.
12a457
+     *
12a457
+     * We must reach here with at least one owner reference.
12a457
+     */
12a457
+    GF_ASSERT((lock->timer == NULL) && (lock->refs_owners > 0));
12a457
 
12a457
-    if ((lock->refs - lock->inserted) > 1) {
12a457
+    /* If the fop detects that a heal is needed, we mark the lock to be
12a457
+     * released as soon as possible. */
12a457
+    lock->release |= ec_fop_needs_heal(fop);
12a457
+
12a457
+    if (lock->refs_owners > 1) {
12a457
         ec_trace("UNLOCK_SKIP", fop, "lock=%p", lock);
12a457
 
12a457
-        lock->refs--;
12a457
+        /* If there are other owners we cannot do anything else with the lock.
12a457
+         * Note that the current fop has already been removed from the owners
12a457
+         * list in ec_lock_reuse(). */
12a457
+        lock->refs_owners--;
12a457
 
12a457
         UNLOCK(&lock->loc.inode->lock);
12a457
     } else if (lock->acquired) {
12a457
-        ec_t *ec = fop->xl->private;
12a457
+        /* There are no other owners and the lock is acquired. If there were
12a457
+         * fops waiting, at least one of them should have been promoted to an
12a457
+         * owner, so the waiting list should be empty. */
12a457
+        GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
12a457
 
12a457
-        GF_ASSERT(list_empty(&lock->owners));
12a457
+        ec_t *ec = fop->xl->private;
12a457
 
12a457
+        /* If everything goes as expected this fop will be put to sleep until
12a457
+         * the timer callback is executed. */
12a457
         ec_sleep(fop);
12a457
 
12a457
-        /* If healing is needed, the lock needs to be released due to
12a457
-         * contention, or ec is shutting down, do not delay lock release. */
12a457
-        if (!lock->release && !ec_fop_needs_heal(fop) && !ec->shutdown) {
12a457
+        /* If the lock needs to be released, or ec is shutting down, do not
12a457
+         * delay lock release. */
12a457
+        if (!lock->release && !ec->shutdown) {
12a457
             ec_trace("UNLOCK_DELAY", fop, "lock=%p, release=%d", lock,
12a457
                      lock->release);
12a457
 
12a457
@@ -1989,9 +2087,10 @@ void ec_unlock_timer_add(ec_lock_link_t *link)
12a457
             if (lock->timer == NULL) {
12a457
                 gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
12a457
                        EC_MSG_UNLOCK_DELAY_FAILED,
12a457
-                       "Unable to delay an "
12a457
-                       "unlock");
12a457
+                       "Unable to delay an unlock");
12a457
 
12a457
+                /* We are unable to create a new timer. We immediately release
12a457
+                 * the lock. */
12a457
                 lock->release = now = _gf_true;
12a457
             }
12a457
         } else {
12a457
@@ -2006,10 +2105,17 @@ void ec_unlock_timer_add(ec_lock_link_t *link)
12a457
             ec_unlock_now(link);
12a457
         }
12a457
     } else {
12a457
+        /* There are no owners and the lock is not acquired. This can only
12a457
+         * happen if a lock attempt has failed and we get to the unlock step
12a457
+         * of the fop. As in the previous case, the waiting list must be
12a457
+         * empty. */
12a457
+        GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
12a457
+
12a457
+        /* We need to mark the lock to be released to correctly handle fops
12a457
+         * that may get in after we release the inode mutex but before
12a457
+         * ec_lock_unfreeze() is processed. */
12a457
         lock->release = _gf_true;
12a457
 
12a457
-        GF_ASSERT(list_empty(&lock->owners));
12a457
-
12a457
         UNLOCK(&lock->loc.inode->lock);
12a457
 
12a457
         ec_lock_unfreeze(link);
12a457
@@ -2052,7 +2158,7 @@ void ec_lock_reuse(ec_fop_data_t *fop)
12a457
             }
12a457
         }
12a457
     } else {
12a457
-        /* If eager lock is disabled or If we haven't get
12a457
+        /* If eager lock is disabled or if we haven't get
12a457
          * an answer with enough quorum, we always release
12a457
          * the lock. */
12a457
         release = _gf_true;
12a457
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
12a457
index 9107b4b..f4214ec 100644
12a457
--- a/xlators/cluster/ec/src/ec-data.h
12a457
+++ b/xlators/cluster/ec/src/ec-data.h
12a457
@@ -139,17 +139,28 @@ struct _ec_lock
12a457
 {
12a457
     ec_inode_t        *ctx;
12a457
     gf_timer_t        *timer;
12a457
-    struct list_head   owners;  /* List of owners of this lock. */
12a457
-    struct list_head   waiting; /* Queue of requests being serviced. */
12a457
-    struct list_head   frozen;  /* Queue of requests that will be serviced in
12a457
-                                   the next unlock/lock cycle. */
12a457
+
12a457
+    /* List of owners of this lock. All fops added to this list are running
12a457
+     * concurrently. */
12a457
+    struct list_head   owners;
12a457
+
12a457
+    /* List of fops waiting to be an owner of the lock. Fops are added to this
12a457
+     * list when the current owner has an incompatible access (shared vs
12a457
+     * exclusive) or the lock is not acquired yet. */
12a457
+    struct list_head   waiting;
12a457
+
12a457
+    /* List of fops that will wait until the next unlock/lock cycle. This
12a457
+     * happens when the currently acquired lock is decided to be released as
12a457
+     * soon as possible. In this case, all frozen fops will be continued only
12a457
+     * after the lock is reacquired. */
12a457
+    struct list_head   frozen;
12a457
+
12a457
     int32_t            exclusive;
12a457
     uintptr_t          mask;
12a457
     uintptr_t          good_mask;
12a457
     uintptr_t          healing;
12a457
-    int32_t            refs;
12a457
-    int32_t            refs_frozen;
12a457
-    int32_t            inserted;
12a457
+    uint32_t           refs_owners;  /* Refs for fops owning the lock */
12a457
+    uint32_t           refs_pending; /* Refs assigned to fops being prepared */
12a457
     gf_boolean_t       acquired;
12a457
     gf_boolean_t       getting_size;
12a457
     gf_boolean_t       release;
12a457
-- 
12a457
1.7.1
12a457