From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 20 Aug 2019 13:27:24 +0530 Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired Problem: Mount-1 Mount-2 1)Tries to acquire lock on 'dir1' 1)Tries to acquire lock on 'dir1' 2)Lock is granted on brick-0 2)Lock gets EAGAIN on brick-0 and leads to blocking lock on brick-0 3)Gets a lock-contention 3) Doesn't matter what happens on mount-2 notification, marks lock->release from here on. to true. 4)New fop comes on 'dir1' which will be put in frozen list as lock->release is set to true. 5) Lock acquisition from step-2 fails because 3 bricks went down in 4+2 setup. Fop on mount-1 which is put in frozen list will hang because no codepath will move it from frozen list to any other list and the lock will not be retried. Fix: Don't set lock->release to true if lock is not acquired at the time of lock-contention-notification Upstream-patch: https://review.gluster.org/c/glusterfs/+/23272 fixes: bz#1731896 Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7 Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/180870 Tested-by: RHGS Build Bot Reviewed-by: Ashish Pandey Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++-- xlators/cluster/ec/src/ec-types.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 2e59180..5cae37b 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link) LOCK(&lock->loc.inode->lock); lock->acquired = _gf_true; + if (lock->contention) { + lock->release = _gf_true; + lock->contention = _gf_false; + } ec_lock_update_fd(lock, fop); ec_lock_wake_shared(lock, &list); @@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ec_lock_link_t *link = NULL; ec_lock_t *lock = NULL; + link = fop->data; + lock = link->lock; if (op_ret >= 0) { - link = fop->data; - lock = link->lock; lock->mask = lock->good_mask = fop->good; lock->healing = 0; ec_lock_acquired(link); ec_lock(fop->parent); } else { + LOCK(&lock->loc.inode->lock); + { + lock->contention = _gf_false; + } + UNLOCK(&lock->loc.inode->lock); gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED, "Failed to complete preop lock"); } @@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode) gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention", inode); + if (!lock->acquired) { + /* This happens if some bricks already got the lock while inodelk is in + * progress. Set release to true after lock is acquired*/ + lock->contention = _gf_true; + goto done; + } + /* The lock is not marked to be released, so the frozen list should be * empty. */ GF_ASSERT(list_empty(&lock->frozen)); diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index ea4f6ad..34a9768 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -267,6 +267,7 @@ struct _ec_lock { uint32_t refs_pending; /* Refs assigned to fops being prepared */ uint32_t waiting_flags; /*Track xattrop/dirty marking*/ gf_boolean_t acquired; + gf_boolean_t contention; gf_boolean_t unlock_now; gf_boolean_t release; gf_boolean_t query; -- 1.8.3.1