cb8e9e
From f83c5ad964c09857faa22dd99fb2537f290a979f Mon Sep 17 00:00:00 2001
cb8e9e
From: Pranith Kumar K <pkarampu@redhat.com>
cb8e9e
Date: Tue, 23 Jun 2015 06:39:49 +0530
cb8e9e
Subject: [PATCH 128/129] cluster/ec: Avoid parallel executions of the same state machine
cb8e9e
cb8e9e
        Backport of http://review.gluster.org/11317
cb8e9e
cb8e9e
In very rare circumstances it was possible that a subfop started
cb8e9e
by another fop could finish fast enough to cause that two or more
cb8e9e
instances of the same state machine be executing at the same time.
cb8e9e
cb8e9e
Change-Id: I319924a18bd3f88115e751a66f8f4560435e0e0e
cb8e9e
BUG: 1230513
cb8e9e
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/51301
cb8e9e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
---
cb8e9e
 xlators/cluster/ec/src/ec-common.c |   25 ++++++++++++-------------
cb8e9e
 1 files changed, 12 insertions(+), 13 deletions(-)
cb8e9e
cb8e9e
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
cb8e9e
index be69c57..f69234e 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec-common.c
cb8e9e
+++ b/xlators/cluster/ec/src/ec-common.c
cb8e9e
@@ -250,7 +250,7 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
cb8e9e
 
cb8e9e
     GF_ASSERT(fop->resume == NULL);
cb8e9e
 
cb8e9e
-    if (fop->jobs != 0)
cb8e9e
+    if (--fop->jobs != 0)
cb8e9e
     {
cb8e9e
         ec_trace("WAIT", fop, "resume=%p", resume);
cb8e9e
 
cb8e9e
@@ -1259,11 +1259,6 @@ void ec_lock(ec_fop_data_t *fop)
cb8e9e
     ec_lock_link_t *timer_link = NULL;
cb8e9e
     ec_lock_t *lock;
cb8e9e
 
cb8e9e
-    /* There is a chance that ec_resume is called on fop even before ec_sleep.
cb8e9e
-     * Which can result in refs == 0 for fop leading to use after free in this
cb8e9e
-     * function when it calls ec_sleep so do ec_sleep at start and end of this
cb8e9e
-     * function.*/
cb8e9e
-    ec_sleep (fop);
cb8e9e
     while (fop->locked < fop->lock_count) {
cb8e9e
         /* Since there are only up to 2 locks per fop, this xor will change
cb8e9e
          * the order of the locks if fop->first_lock is 1. */
cb8e9e
@@ -1328,7 +1323,6 @@ void ec_lock(ec_fop_data_t *fop)
cb8e9e
             timer_link = NULL;
cb8e9e
         }
cb8e9e
     }
cb8e9e
-    ec_resume (fop, 0);
cb8e9e
 
cb8e9e
     if (timer_link != NULL) {
cb8e9e
         ec_resume(timer_link->fop, 0);
cb8e9e
@@ -1725,15 +1719,9 @@ void ec_unlock(ec_fop_data_t *fop)
cb8e9e
 {
cb8e9e
     int32_t i;
cb8e9e
 
cb8e9e
-    /*If fop->lock_count > 1 then there is a chance that by the time
cb8e9e
-     * ec_unlock_timer_add() is called for the second lock epoll thread could
cb8e9e
-     * reply inodelk and jobs will be '0' leading to completion of state
cb8e9e
-     * machine and freeing of fop. So add sleep/resume*/
cb8e9e
-    ec_sleep (fop);
cb8e9e
     for (i = 0; i < fop->lock_count; i++) {
cb8e9e
         ec_unlock_timer_add(&fop->locks[i]);
cb8e9e
     }
cb8e9e
-    ec_resume (fop, 0);
cb8e9e
 }
cb8e9e
 
cb8e9e
 void
cb8e9e
@@ -1879,6 +1867,17 @@ void __ec_manager(ec_fop_data_t * fop, int32_t error)
cb8e9e
             break;
cb8e9e
         }
cb8e9e
 
cb8e9e
+        /* At each state, fop must not be used anywhere else and there
cb8e9e
+         * shouldn't be any pending subfop going on. */
cb8e9e
+        GF_ASSERT(fop->jobs == 0);
cb8e9e
+
cb8e9e
+        /* While the manager is running we need to avoid that subfops launched
cb8e9e
+         * from it could finish and call ec_resume() before the fop->handler
cb8e9e
+         * has completed. This could lead to the same manager being executed
cb8e9e
+         * by two threads concurrently. ec_check_complete() will take care of
cb8e9e
+         * this reference. */
cb8e9e
+        fop->jobs = 1;
cb8e9e
+
cb8e9e
         fop->state = fop->handler(fop, fop->state);
cb8e9e
 
cb8e9e
         error = ec_check_complete(fop, __ec_manager);
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e