cb8e9e
From f0e823dfc224c9c3c7a1179453ef6fe087171787 Mon Sep 17 00:00:00 2001
cb8e9e
From: Pranith Kumar K <pkarampu@redhat.com>
cb8e9e
Date: Wed, 15 Jul 2015 06:16:54 +0530
cb8e9e
Subject: [PATCH 244/244] cluster/ec: Handle race between unlock-timer, new lock
cb8e9e
cb8e9e
        Backport of http://review.gluster.org/11670
cb8e9e
cb8e9e
Problem:
cb8e9e
New lock could come at the time timer is on the way to unlock. This was leading
cb8e9e
to crash in timer thread because thread executing new lock can free up the
cb8e9e
timer_link->fop and then timer thread will try to access structures already
cb8e9e
freed.
cb8e9e
cb8e9e
Fix:
cb8e9e
If the timer event is fired, set lock->release to true and wait for unlock to
cb8e9e
complete.
cb8e9e
cb8e9e
Thanks to Xavi and Bhaskar for helping in confirming that this race is the RC.
cb8e9e
Thanks to Kritika for pointing out and explaining how Avati's patch can be used
cb8e9e
to fix this bug.
cb8e9e
cb8e9e
Change-Id: I45fa5470bbc1f03b5f3d133e26d1e0ab24303378
cb8e9e
BUG: 1242423
cb8e9e
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/53061
cb8e9e
---
cb8e9e
 xlators/cluster/ec/src/ec-common.c |   33 ++++++++++++++-------------------
cb8e9e
 xlators/cluster/ec/src/ec-common.h |    1 -
cb8e9e
 xlators/cluster/ec/src/ec.c        |   33 +++------------------------------
cb8e9e
 3 files changed, 17 insertions(+), 50 deletions(-)
cb8e9e
cb8e9e
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
cb8e9e
index b2d0348..ec6a4aa 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec-common.c
cb8e9e
+++ b/xlators/cluster/ec/src/ec-common.c
cb8e9e
@@ -1358,13 +1358,20 @@ void ec_lock(ec_fop_data_t *fop)
cb8e9e
         if (lock->timer != NULL) {
cb8e9e
             GF_ASSERT (lock->release == _gf_false);
cb8e9e
             timer_link = lock->timer->data;
cb8e9e
-            ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
cb8e9e
-            gf_timer_call_cancel(fop->xl->ctx, lock->timer);
cb8e9e
-            lock->timer = NULL;
cb8e9e
-
cb8e9e
-            lock->refs--;
cb8e9e
-            /* There should remain at least 1 ref, the current one. */
cb8e9e
-            GF_ASSERT(lock->refs > 0);
cb8e9e
+            if (gf_timer_call_cancel(fop->xl->ctx, lock->timer) == 0) {
cb8e9e
+                    ec_trace("UNLOCK_CANCELLED", timer_link->fop,
cb8e9e
+                             "lock=%p", lock);
cb8e9e
+                    lock->timer = NULL;
cb8e9e
+                    lock->refs--;
cb8e9e
+                    /* There should remain at least 1 ref, the current one. */
cb8e9e
+                    GF_ASSERT(lock->refs > 0);
cb8e9e
+            } else {
cb8e9e
+                    /* Timer expired and on the way to unlock.
cb8e9e
+                     * Set lock->release to _gf_true, so that this
cb8e9e
+                     * lock will be put in frozen list*/
cb8e9e
+                    timer_link = NULL;
cb8e9e
+                    lock->release = _gf_true;
cb8e9e
+            }
cb8e9e
         }
cb8e9e
 
cb8e9e
         GF_ASSERT(list_empty(&link->wait_list));
cb8e9e
@@ -1815,18 +1822,6 @@ void ec_unlock(ec_fop_data_t *fop)
cb8e9e
     }
cb8e9e
 }
cb8e9e
 
cb8e9e
-void
cb8e9e
-ec_unlock_force(ec_fop_data_t *fop)
cb8e9e
-{
cb8e9e
-        int32_t i;
cb8e9e
-
cb8e9e
-        for (i = 0; i < fop->lock_count; i++) {
cb8e9e
-                ec_trace("UNLOCK_FORCED", fop, "lock=%p", &fop->locks[i]);
cb8e9e
-
cb8e9e
-                ec_unlock_timer_del(&fop->locks[i]);
cb8e9e
-        }
cb8e9e
-}
cb8e9e
-
cb8e9e
 void ec_flush_size_version(ec_fop_data_t *fop)
cb8e9e
 {
cb8e9e
     GF_ASSERT(fop->lock_count == 1);
cb8e9e
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
cb8e9e
index 4c7fe0c..a851fb3 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec-common.h
cb8e9e
+++ b/xlators/cluster/ec/src/ec-common.h
cb8e9e
@@ -91,7 +91,6 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags);
cb8e9e
 void ec_lock(ec_fop_data_t * fop);
cb8e9e
 void ec_lock_reuse(ec_fop_data_t *fop);
cb8e9e
 void ec_unlock(ec_fop_data_t * fop);
cb8e9e
-void ec_unlock_force(ec_fop_data_t *fop);
cb8e9e
 
cb8e9e
 gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
cb8e9e
                                uint64_t *size);
cb8e9e
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
cb8e9e
index 4673c11..2885b01 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec.c
cb8e9e
+++ b/xlators/cluster/ec/src/ec.c
cb8e9e
@@ -395,38 +395,11 @@ ec_handle_down (xlator_t *this, ec_t *ec, int32_t idx)
cb8e9e
 }
cb8e9e
 
cb8e9e
 gf_boolean_t
cb8e9e
-ec_force_unlocks(ec_t *ec)
cb8e9e
+ec_disable_delays(ec_t *ec)
cb8e9e
 {
cb8e9e
-        struct list_head list;
cb8e9e
-        ec_fop_data_t *fop;
cb8e9e
-
cb8e9e
-        if (list_empty(&ec->pending_fops)) {
cb8e9e
-                return _gf_true;
cb8e9e
-        }
cb8e9e
-
cb8e9e
-        INIT_LIST_HEAD(&list);
cb8e9e
-
cb8e9e
-        /* All pending fops when GF_EVENT_PARENT_DOWN is received should only
cb8e9e
-         * be fops waiting for a delayed unlock. However the unlock can
cb8e9e
-         * generate new fops. We don't want to trverse these new fops while
cb8e9e
-         * forcing unlocks, so we move all fops to a temporal list. To process
cb8e9e
-         * them without interferences.*/
cb8e9e
-        list_splice_init(&ec->pending_fops, &list);
cb8e9e
-
cb8e9e
-        while (!list_empty(&list)) {
cb8e9e
-                fop = list_entry(list.next, ec_fop_data_t, pending_list);
cb8e9e
-                list_move_tail(&fop->pending_list, &ec->pending_fops);
cb8e9e
-
cb8e9e
-                UNLOCK(&ec->lock);
cb8e9e
-
cb8e9e
-                ec_unlock_force(fop);
cb8e9e
-
cb8e9e
-                LOCK(&ec->lock);
cb8e9e
-        }
cb8e9e
-
cb8e9e
         ec->shutdown = _gf_true;
cb8e9e
 
cb8e9e
-        return list_empty(&ec->pending_fops);
cb8e9e
+        return list_empty (&ec->pending_fops);
cb8e9e
 }
cb8e9e
 
cb8e9e
 void
cb8e9e
@@ -482,7 +455,7 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
cb8e9e
         } else if (event == GF_EVENT_PARENT_DOWN) {
cb8e9e
                 /* If there aren't pending fops running after we have waken up
cb8e9e
                  * them, we immediately propagate the notification. */
cb8e9e
-                propagate = ec_force_unlocks(ec);
cb8e9e
+                propagate = ec_disable_delays(ec);
cb8e9e
                 goto unlock;
cb8e9e
         }
cb8e9e
 
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e