14f8ab
From 66600fb55522d405a68d7340a5680a2633c4237e Mon Sep 17 00:00:00 2001
14f8ab
From: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
Date: Thu, 30 Apr 2020 11:19:01 +0200
14f8ab
Subject: [PATCH 377/379] syncop: improve scaling and implement more tools
14f8ab
14f8ab
The current scaling of the syncop thread pool is not working properly
14f8ab
and can leave some tasks in the run queue more time than necessary
14f8ab
when the maximum number of threads is not reached.
14f8ab
14f8ab
This patch provides a better scaling condition to react faster to
14f8ab
pending work.
14f8ab
14f8ab
Condition variables and sleep in the context of a synctask have also
14f8ab
been implemented. Their purpose is to replace regular condition
14f8ab
variables and sleeps that block synctask threads and prevent other
14f8ab
tasks to be executed.
14f8ab
14f8ab
The new features have been applied to several places in glusterd.
14f8ab
14f8ab
upstream patch: https://review.gluster.org/#/c/glusterfs/+/24396/
14f8ab
14f8ab
> Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf
14f8ab
> Fixes: #1116
14f8ab
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
14f8ab
Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf
14f8ab
BUG: 1810516
14f8ab
Signed-off-by: Sanju Rakonde <srakonde@redhta.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/200409
14f8ab
Tested-by: Sanju Rakonde <srakonde@redhat.com>
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
14f8ab
---
14f8ab
 libglusterfs/src/glusterfs/syncop.h                |  52 +++-
14f8ab
 libglusterfs/src/libglusterfs.sym                  |   7 +
14f8ab
 libglusterfs/src/syncop.c                          | 306 ++++++++++++++++-----
14f8ab
 xlators/cluster/dht/src/dht-rebalance.c            |   2 +-
14f8ab
 xlators/mgmt/glusterd/src/glusterd-op-sm.c         |   9 +-
14f8ab
 xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c     |   2 +-
14f8ab
 .../mgmt/glusterd/src/glusterd-snapshot-utils.c    |   5 +-
14f8ab
 xlators/mgmt/glusterd/src/glusterd-syncop.h        |   2 +-
14f8ab
 xlators/mgmt/glusterd/src/glusterd-utils.c         |  29 +-
14f8ab
 xlators/mgmt/glusterd/src/glusterd.c               |   2 +
14f8ab
 xlators/mgmt/glusterd/src/glusterd.h               |   2 +
14f8ab
 11 files changed, 317 insertions(+), 101 deletions(-)
14f8ab
14f8ab
diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
14f8ab
index e0f1017..3011b4c 100644
14f8ab
--- a/libglusterfs/src/glusterfs/syncop.h
14f8ab
+++ b/libglusterfs/src/glusterfs/syncop.h
14f8ab
@@ -15,6 +15,7 @@
14f8ab
 #include <sys/time.h>
14f8ab
 #include <pthread.h>
14f8ab
 #include <ucontext.h>
14f8ab
+#include "glusterfs/timer.h"
14f8ab
 
14f8ab
 #define SYNCENV_PROC_MAX 16
14f8ab
 #define SYNCENV_PROC_MIN 2
14f8ab
@@ -32,6 +33,7 @@
14f8ab
 struct synctask;
14f8ab
 struct syncproc;
14f8ab
 struct syncenv;
14f8ab
+struct synccond;
14f8ab
 
14f8ab
 typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque);
14f8ab
 
14f8ab
@@ -55,9 +57,12 @@ struct synctask {
14f8ab
     call_frame_t *opframe;
14f8ab
     synctask_cbk_t synccbk;
14f8ab
     synctask_fn_t syncfn;
14f8ab
-    synctask_state_t state;
14f8ab
+    struct timespec *delta;
14f8ab
+    gf_timer_t *timer;
14f8ab
+    struct synccond *synccond;
14f8ab
     void *opaque;
14f8ab
     void *stack;
14f8ab
+    synctask_state_t state;
14f8ab
     int woken;
14f8ab
     int slept;
14f8ab
     int ret;
14f8ab
@@ -85,19 +90,21 @@ struct syncproc {
14f8ab
 /* hosts the scheduler thread and framework for executing synctasks */
14f8ab
 struct syncenv {
14f8ab
     struct syncproc proc[SYNCENV_PROC_MAX];
14f8ab
-    int procs;
14f8ab
+
14f8ab
+    pthread_mutex_t mutex;
14f8ab
+    pthread_cond_t cond;
14f8ab
 
14f8ab
     struct list_head runq;
14f8ab
-    int runcount;
14f8ab
     struct list_head waitq;
14f8ab
-    int waitcount;
14f8ab
+
14f8ab
+    int procs;
14f8ab
+    int procs_idle;
14f8ab
+
14f8ab
+    int runcount;
14f8ab
 
14f8ab
     int procmin;
14f8ab
     int procmax;
14f8ab
 
14f8ab
-    pthread_mutex_t mutex;
14f8ab
-    pthread_cond_t cond;
14f8ab
-
14f8ab
     size_t stacksize;
14f8ab
 
14f8ab
     int destroy; /* FLAG to mark syncenv is in destroy mode
14f8ab
@@ -123,6 +130,13 @@ struct synclock {
14f8ab
 };
14f8ab
 typedef struct synclock synclock_t;
14f8ab
 
14f8ab
+struct synccond {
14f8ab
+    pthread_mutex_t pmutex;
14f8ab
+    pthread_cond_t pcond;
14f8ab
+    struct list_head waitq;
14f8ab
+};
14f8ab
+typedef struct synccond synccond_t;
14f8ab
+
14f8ab
 struct syncbarrier {
14f8ab
     gf_boolean_t initialized; /*Set on successful initialization*/
14f8ab
     pthread_mutex_t guard;    /* guard the remaining members, pair @cond */
14f8ab
@@ -219,7 +233,7 @@ struct syncopctx {
14f8ab
 #define __yield(args)                                                          \
14f8ab
     do {                                                                       \
14f8ab
         if (args->task) {                                                      \
14f8ab
-            synctask_yield(args->task);                                        \
14f8ab
+            synctask_yield(args->task, NULL);                                  \
14f8ab
         } else {                                                               \
14f8ab
             pthread_mutex_lock(&args->mutex);                                  \
14f8ab
             {                                                                  \
14f8ab
@@ -307,7 +321,9 @@ synctask_join(struct synctask *task);
14f8ab
 void
14f8ab
 synctask_wake(struct synctask *task);
14f8ab
 void
14f8ab
-synctask_yield(struct synctask *task);
14f8ab
+synctask_yield(struct synctask *task, struct timespec *delta);
14f8ab
+void
14f8ab
+synctask_sleep(int32_t secs);
14f8ab
 void
14f8ab
 synctask_waitfor(struct synctask *task, int count);
14f8ab
 
14f8ab
@@ -405,6 +421,24 @@ synclock_trylock(synclock_t *lock);
14f8ab
 int
14f8ab
 synclock_unlock(synclock_t *lock);
14f8ab
 
14f8ab
+int32_t
14f8ab
+synccond_init(synccond_t *cond);
14f8ab
+
14f8ab
+void
14f8ab
+synccond_destroy(synccond_t *cond);
14f8ab
+
14f8ab
+int
14f8ab
+synccond_wait(synccond_t *cond, synclock_t *lock);
14f8ab
+
14f8ab
+int
14f8ab
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta);
14f8ab
+
14f8ab
+void
14f8ab
+synccond_signal(synccond_t *cond);
14f8ab
+
14f8ab
+void
14f8ab
+synccond_broadcast(synccond_t *cond);
14f8ab
+
14f8ab
 int
14f8ab
 syncbarrier_init(syncbarrier_t *barrier);
14f8ab
 int
14f8ab
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
14f8ab
index 467a1b7..5a721e0 100644
14f8ab
--- a/libglusterfs/src/libglusterfs.sym
14f8ab
+++ b/libglusterfs/src/libglusterfs.sym
14f8ab
@@ -938,6 +938,12 @@ syncbarrier_destroy
14f8ab
 syncbarrier_init
14f8ab
 syncbarrier_wait
14f8ab
 syncbarrier_wake
14f8ab
+synccond_init
14f8ab
+synccond_destroy
14f8ab
+synccond_wait
14f8ab
+synccond_timedwait
14f8ab
+synccond_signal
14f8ab
+synccond_broadcast
14f8ab
 syncenv_destroy
14f8ab
 syncenv_new
14f8ab
 synclock_destroy
14f8ab
@@ -1015,6 +1021,7 @@ synctask_new
14f8ab
 synctask_new1
14f8ab
 synctask_set
14f8ab
 synctask_setid
14f8ab
+synctask_sleep
14f8ab
 synctask_wake
14f8ab
 synctask_yield
14f8ab
 sys_access
14f8ab
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
14f8ab
index 693970f..71d37b7 100644
14f8ab
--- a/libglusterfs/src/syncop.c
14f8ab
+++ b/libglusterfs/src/syncop.c
14f8ab
@@ -154,10 +154,14 @@ out:
14f8ab
     return ret;
14f8ab
 }
14f8ab
 
14f8ab
+void *
14f8ab
+syncenv_processor(void *thdata);
14f8ab
+
14f8ab
 static void
14f8ab
 __run(struct synctask *task)
14f8ab
 {
14f8ab
     struct syncenv *env = NULL;
14f8ab
+    int32_t total, ret, i;
14f8ab
 
14f8ab
     env = task->env;
14f8ab
 
14f8ab
@@ -173,7 +177,6 @@ __run(struct synctask *task)
14f8ab
             env->runcount--;
14f8ab
             break;
14f8ab
         case SYNCTASK_WAIT:
14f8ab
-            env->waitcount--;
14f8ab
             break;
14f8ab
         case SYNCTASK_DONE:
14f8ab
             gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
14f8ab
@@ -187,8 +190,27 @@ __run(struct synctask *task)
14f8ab
     }
14f8ab
 
14f8ab
     list_add_tail(&task->all_tasks, &env->runq);
14f8ab
-    env->runcount++;
14f8ab
     task->state = SYNCTASK_RUN;
14f8ab
+
14f8ab
+    env->runcount++;
14f8ab
+
14f8ab
+    total = env->procs + env->runcount - env->procs_idle;
14f8ab
+    if (total > env->procmax) {
14f8ab
+        total = env->procmax;
14f8ab
+    }
14f8ab
+    if (total > env->procs) {
14f8ab
+        for (i = 0; i < env->procmax; i++) {
14f8ab
+            if (env->proc[i].env == NULL) {
14f8ab
+                env->proc[i].env = env;
14f8ab
+                ret = gf_thread_create(&env->proc[i].processor, NULL,
14f8ab
+                                       syncenv_processor, &env->proc[i],
14f8ab
+                                       "sproc%d", i);
14f8ab
+                if ((ret < 0) || (++env->procs >= total)) {
14f8ab
+                    break;
14f8ab
+                }
14f8ab
+            }
14f8ab
+        }
14f8ab
+    }
14f8ab
 }
14f8ab
 
14f8ab
 static void
14f8ab
@@ -210,7 +232,6 @@ __wait(struct synctask *task)
14f8ab
             gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK,
14f8ab
                    "re-waiting already waiting "
14f8ab
                    "task");
14f8ab
-            env->waitcount--;
14f8ab
             break;
14f8ab
         case SYNCTASK_DONE:
14f8ab
             gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
14f8ab
@@ -223,12 +244,11 @@ __wait(struct synctask *task)
14f8ab
     }
14f8ab
 
14f8ab
     list_add_tail(&task->all_tasks, &env->waitq);
14f8ab
-    env->waitcount++;
14f8ab
     task->state = SYNCTASK_WAIT;
14f8ab
 }
14f8ab
 
14f8ab
 void
14f8ab
-synctask_yield(struct synctask *task)
14f8ab
+synctask_yield(struct synctask *task, struct timespec *delta)
14f8ab
 {
14f8ab
     xlator_t *oldTHIS = THIS;
14f8ab
 
14f8ab
@@ -237,6 +257,8 @@ synctask_yield(struct synctask *task)
14f8ab
     task->proc->sched.uc_flags &= ~_UC_TLSBASE;
14f8ab
 #endif
14f8ab
 
14f8ab
+    task->delta = delta;
14f8ab
+
14f8ab
     if (task->state != SYNCTASK_DONE) {
14f8ab
         task->state = SYNCTASK_SUSPEND;
14f8ab
     }
14f8ab
@@ -249,6 +271,35 @@ synctask_yield(struct synctask *task)
14f8ab
 }
14f8ab
 
14f8ab
 void
14f8ab
+synctask_sleep(int32_t secs)
14f8ab
+{
14f8ab
+    struct timespec delta;
14f8ab
+    struct synctask *task;
14f8ab
+
14f8ab
+    task = synctask_get();
14f8ab
+
14f8ab
+    if (task == NULL) {
14f8ab
+        sleep(secs);
14f8ab
+    } else {
14f8ab
+        delta.tv_sec = secs;
14f8ab
+        delta.tv_nsec = 0;
14f8ab
+
14f8ab
+        synctask_yield(task, &delta);
14f8ab
+    }
14f8ab
+}
14f8ab
+
14f8ab
+static void
14f8ab
+__synctask_wake(struct synctask *task)
14f8ab
+{
14f8ab
+    task->woken = 1;
14f8ab
+
14f8ab
+    if (task->slept)
14f8ab
+        __run(task);
14f8ab
+
14f8ab
+    pthread_cond_broadcast(&task->env->cond);
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
 synctask_wake(struct synctask *task)
14f8ab
 {
14f8ab
     struct syncenv *env = NULL;
14f8ab
@@ -257,13 +308,18 @@ synctask_wake(struct synctask *task)
14f8ab
 
14f8ab
     pthread_mutex_lock(&env->mutex);
14f8ab
     {
14f8ab
-        task->woken = 1;
14f8ab
+        if (task->timer != NULL) {
14f8ab
+            if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) {
14f8ab
+                goto unlock;
14f8ab
+            }
14f8ab
 
14f8ab
-        if (task->slept)
14f8ab
-            __run(task);
14f8ab
+            task->timer = NULL;
14f8ab
+            task->synccond = NULL;
14f8ab
+        }
14f8ab
 
14f8ab
-        pthread_cond_broadcast(&env->cond);
14f8ab
+        __synctask_wake(task);
14f8ab
     }
14f8ab
+unlock:
14f8ab
     pthread_mutex_unlock(&env->mutex);
14f8ab
 }
14f8ab
 
14f8ab
@@ -282,7 +338,7 @@ synctask_wrap(void)
14f8ab
 
14f8ab
     task->state = SYNCTASK_DONE;
14f8ab
 
14f8ab
-    synctask_yield(task);
14f8ab
+    synctask_yield(task, NULL);
14f8ab
 }
14f8ab
 
14f8ab
 void
14f8ab
@@ -422,11 +478,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
14f8ab
     }
14f8ab
 
14f8ab
     synctask_wake(newtask);
14f8ab
-    /*
14f8ab
-     * Make sure someone's there to execute anything we just put on the
14f8ab
-     * run queue.
14f8ab
-     */
14f8ab
-    syncenv_scale(env);
14f8ab
 
14f8ab
     return newtask;
14f8ab
 err:
14f8ab
@@ -520,8 +571,12 @@ syncenv_task(struct syncproc *proc)
14f8ab
                 goto unlock;
14f8ab
             }
14f8ab
 
14f8ab
+            env->procs_idle++;
14f8ab
+
14f8ab
             sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME;
14f8ab
             ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
14f8ab
+
14f8ab
+            env->procs_idle--;
14f8ab
         }
14f8ab
 
14f8ab
         task = list_entry(env->runq.next, struct synctask, all_tasks);
14f8ab
@@ -540,6 +595,34 @@ unlock:
14f8ab
     return task;
14f8ab
 }
14f8ab
 
14f8ab
+static void
14f8ab
+synctask_timer(void *data)
14f8ab
+{
14f8ab
+    struct synctask *task = data;
14f8ab
+    struct synccond *cond;
14f8ab
+
14f8ab
+    cond = task->synccond;
14f8ab
+    if (cond != NULL) {
14f8ab
+        pthread_mutex_lock(&cond->pmutex);
14f8ab
+
14f8ab
+        list_del_init(&task->waitq);
14f8ab
+        task->synccond = NULL;
14f8ab
+
14f8ab
+        pthread_mutex_unlock(&cond->pmutex);
14f8ab
+
14f8ab
+        task->ret = -ETIMEDOUT;
14f8ab
+    }
14f8ab
+
14f8ab
+    pthread_mutex_lock(&task->env->mutex);
14f8ab
+
14f8ab
+    gf_timer_call_cancel(task->xl->ctx, task->timer);
14f8ab
+    task->timer = NULL;
14f8ab
+
14f8ab
+    __synctask_wake(task);
14f8ab
+
14f8ab
+    pthread_mutex_unlock(&task->env->mutex);
14f8ab
+}
14f8ab
+
14f8ab
 void
14f8ab
 synctask_switchto(struct synctask *task)
14f8ab
 {
14f8ab
@@ -572,7 +655,14 @@ synctask_switchto(struct synctask *task)
14f8ab
         } else {
14f8ab
             task->slept = 1;
14f8ab
             __wait(task);
14f8ab
+
14f8ab
+            if (task->delta != NULL) {
14f8ab
+                task->timer = gf_timer_call_after(task->xl->ctx, *task->delta,
14f8ab
+                                                  synctask_timer, task);
14f8ab
+            }
14f8ab
         }
14f8ab
+
14f8ab
+        task->delta = NULL;
14f8ab
     }
14f8ab
     pthread_mutex_unlock(&env->mutex);
14f8ab
 }
14f8ab
@@ -580,65 +670,18 @@ synctask_switchto(struct synctask *task)
14f8ab
 void *
14f8ab
 syncenv_processor(void *thdata)
14f8ab
 {
14f8ab
-    struct syncenv *env = NULL;
14f8ab
     struct syncproc *proc = NULL;
14f8ab
     struct synctask *task = NULL;
14f8ab
 
14f8ab
     proc = thdata;
14f8ab
-    env = proc->env;
14f8ab
-
14f8ab
-    for (;;) {
14f8ab
-        task = syncenv_task(proc);
14f8ab
-        if (!task)
14f8ab
-            break;
14f8ab
 
14f8ab
+    while ((task = syncenv_task(proc)) != NULL) {
14f8ab
         synctask_switchto(task);
14f8ab
-
14f8ab
-        syncenv_scale(env);
14f8ab
     }
14f8ab
 
14f8ab
     return NULL;
14f8ab
 }
14f8ab
 
14f8ab
-void
14f8ab
-syncenv_scale(struct syncenv *env)
14f8ab
-{
14f8ab
-    int diff = 0;
14f8ab
-    int scale = 0;
14f8ab
-    int i = 0;
14f8ab
-    int ret = 0;
14f8ab
-
14f8ab
-    pthread_mutex_lock(&env->mutex);
14f8ab
-    {
14f8ab
-        if (env->procs > env->runcount)
14f8ab
-            goto unlock;
14f8ab
-
14f8ab
-        scale = env->runcount;
14f8ab
-        if (scale > env->procmax)
14f8ab
-            scale = env->procmax;
14f8ab
-        if (scale > env->procs)
14f8ab
-            diff = scale - env->procs;
14f8ab
-        while (diff) {
14f8ab
-            diff--;
14f8ab
-            for (; (i < env->procmax); i++) {
14f8ab
-                if (env->proc[i].processor == 0)
14f8ab
-                    break;
14f8ab
-            }
14f8ab
-
14f8ab
-            env->proc[i].env = env;
14f8ab
-            ret = gf_thread_create(&env->proc[i].processor, NULL,
14f8ab
-                                   syncenv_processor, &env->proc[i],
14f8ab
-                                   "sproc%03hx", env->procs & 0x3ff);
14f8ab
-            if (ret)
14f8ab
-                break;
14f8ab
-            env->procs++;
14f8ab
-            i++;
14f8ab
-        }
14f8ab
-    }
14f8ab
-unlock:
14f8ab
-    pthread_mutex_unlock(&env->mutex);
14f8ab
-}
14f8ab
-
14f8ab
 /* The syncenv threads are cleaned up in this routine.
14f8ab
  */
14f8ab
 void
14f8ab
@@ -715,12 +758,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax)
14f8ab
         newenv->stacksize = stacksize;
14f8ab
     newenv->procmin = procmin;
14f8ab
     newenv->procmax = procmax;
14f8ab
+    newenv->procs_idle = 0;
14f8ab
 
14f8ab
     for (i = 0; i < newenv->procmin; i++) {
14f8ab
         newenv->proc[i].env = newenv;
14f8ab
         ret = gf_thread_create(&newenv->proc[i].processor, NULL,
14f8ab
                                syncenv_processor, &newenv->proc[i], "sproc%d",
14f8ab
-                               newenv->procs);
14f8ab
+                               i);
14f8ab
         if (ret)
14f8ab
             break;
14f8ab
         newenv->procs++;
14f8ab
@@ -810,7 +854,7 @@ __synclock_lock(struct synclock *lock)
14f8ab
             task->woken = 0;
14f8ab
             list_add_tail(&task->waitq, &lock->waitq);
14f8ab
             pthread_mutex_unlock(&lock->guard);
14f8ab
-            synctask_yield(task);
14f8ab
+            synctask_yield(task, NULL);
14f8ab
             /* task is removed from waitq in unlock,
14f8ab
              * under lock->guard.*/
14f8ab
             pthread_mutex_lock(&lock->guard);
14f8ab
@@ -963,6 +1007,136 @@ synclock_unlock(synclock_t *lock)
14f8ab
     return ret;
14f8ab
 }
14f8ab
 
14f8ab
+/* Condition variables */
14f8ab
+
14f8ab
+int32_t
14f8ab
+synccond_init(synccond_t *cond)
14f8ab
+{
14f8ab
+    int32_t ret;
14f8ab
+
14f8ab
+    INIT_LIST_HEAD(&cond->waitq);
14f8ab
+
14f8ab
+    ret = pthread_mutex_init(&cond->pmutex, NULL);
14f8ab
+    if (ret != 0) {
14f8ab
+        return -ret;
14f8ab
+    }
14f8ab
+
14f8ab
+    ret = pthread_cond_init(&cond->pcond, NULL);
14f8ab
+    if (ret != 0) {
14f8ab
+        pthread_mutex_destroy(&cond->pmutex);
14f8ab
+    }
14f8ab
+
14f8ab
+    return -ret;
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
+synccond_destroy(synccond_t *cond)
14f8ab
+{
14f8ab
+    pthread_cond_destroy(&cond->pcond);
14f8ab
+    pthread_mutex_destroy(&cond->pmutex);
14f8ab
+}
14f8ab
+
14f8ab
+int
14f8ab
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta)
14f8ab
+{
14f8ab
+    struct timespec now;
14f8ab
+    struct synctask *task = NULL;
14f8ab
+    int ret;
14f8ab
+
14f8ab
+    task = synctask_get();
14f8ab
+
14f8ab
+    if (task == NULL) {
14f8ab
+        if (delta != NULL) {
14f8ab
+            timespec_now_realtime(&now;;
14f8ab
+            timespec_adjust_delta(&now, *delta);
14f8ab
+        }
14f8ab
+
14f8ab
+        pthread_mutex_lock(&cond->pmutex);
14f8ab
+
14f8ab
+        if (delta == NULL) {
14f8ab
+            ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex);
14f8ab
+        } else {
14f8ab
+            ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now;;
14f8ab
+        }
14f8ab
+    } else {
14f8ab
+        pthread_mutex_lock(&cond->pmutex);
14f8ab
+
14f8ab
+        list_add_tail(&task->waitq, &cond->waitq);
14f8ab
+        task->synccond = cond;
14f8ab
+
14f8ab
+        ret = synclock_unlock(lock);
14f8ab
+        if (ret == 0) {
14f8ab
+            pthread_mutex_unlock(&cond->pmutex);
14f8ab
+
14f8ab
+            synctask_yield(task, delta);
14f8ab
+
14f8ab
+            ret = synclock_lock(lock);
14f8ab
+            if (ret == 0) {
14f8ab
+                ret = task->ret;
14f8ab
+            }
14f8ab
+            task->ret = 0;
14f8ab
+
14f8ab
+            return ret;
14f8ab
+        }
14f8ab
+
14f8ab
+        list_del_init(&task->waitq);
14f8ab
+    }
14f8ab
+
14f8ab
+    pthread_mutex_unlock(&cond->pmutex);
14f8ab
+
14f8ab
+    return ret;
14f8ab
+}
14f8ab
+
14f8ab
+int
14f8ab
+synccond_wait(synccond_t *cond, synclock_t *lock)
14f8ab
+{
14f8ab
+    return synccond_timedwait(cond, lock, NULL);
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
+synccond_signal(synccond_t *cond)
14f8ab
+{
14f8ab
+    struct synctask *task;
14f8ab
+
14f8ab
+    pthread_mutex_lock(&cond->pmutex);
14f8ab
+
14f8ab
+    if (!list_empty(&cond->waitq)) {
14f8ab
+        task = list_first_entry(&cond->waitq, struct synctask, waitq);
14f8ab
+        list_del_init(&task->waitq);
14f8ab
+
14f8ab
+        pthread_mutex_unlock(&cond->pmutex);
14f8ab
+
14f8ab
+        synctask_wake(task);
14f8ab
+    } else {
14f8ab
+        pthread_cond_signal(&cond->pcond);
14f8ab
+
14f8ab
+        pthread_mutex_unlock(&cond->pmutex);
14f8ab
+    }
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
+synccond_broadcast(synccond_t *cond)
14f8ab
+{
14f8ab
+    struct list_head list;
14f8ab
+    struct synctask *task;
14f8ab
+
14f8ab
+    INIT_LIST_HEAD(&list);
14f8ab
+
14f8ab
+    pthread_mutex_lock(&cond->pmutex);
14f8ab
+
14f8ab
+    list_splice_init(&cond->waitq, &list);
14f8ab
+    pthread_cond_broadcast(&cond->pcond);
14f8ab
+
14f8ab
+    pthread_mutex_unlock(&cond->pmutex);
14f8ab
+
14f8ab
+    while (!list_empty(&list)) {
14f8ab
+        task = list_first_entry(&list, struct synctask, waitq);
14f8ab
+        list_del_init(&task->waitq);
14f8ab
+
14f8ab
+        synctask_wake(task);
14f8ab
+    }
14f8ab
+}
14f8ab
+
14f8ab
 /* Barriers */
14f8ab
 
14f8ab
 int
14f8ab
@@ -1032,7 +1206,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
14f8ab
             /* called within a synctask */
14f8ab
             list_add_tail(&task->waitq, &barrier->waitq);
14f8ab
             pthread_mutex_unlock(&barrier->guard);
14f8ab
-            synctask_yield(task);
14f8ab
+            synctask_yield(task, NULL);
14f8ab
             pthread_mutex_lock(&barrier->guard);
14f8ab
         } else {
14f8ab
             /* called by a non-synctask */
14f8ab
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
14f8ab
index c692119..957deaa 100644
14f8ab
--- a/xlators/cluster/dht/src/dht-rebalance.c
14f8ab
+++ b/xlators/cluster/dht/src/dht-rebalance.c
14f8ab
@@ -5224,7 +5224,7 @@ gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
14f8ab
     defrag->tier_conf.pause_timer = gf_timer_call_after(
14f8ab
         this->ctx, delta, gf_defrag_pause_tier_timeout, this);
14f8ab
 
14f8ab
-    synctask_yield(defrag->tier_conf.pause_synctask);
14f8ab
+    synctask_yield(defrag->tier_conf.pause_synctask, NULL);
14f8ab
 
14f8ab
     if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
14f8ab
         goto out;
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
14f8ab
index 0d29de2..6475611 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
14f8ab
@@ -6076,13 +6076,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
14f8ab
 static void
14f8ab
 glusterd_wait_for_blockers(glusterd_conf_t *priv)
14f8ab
 {
14f8ab
-    uint64_t blockers = GF_ATOMIC_GET(priv->blockers);
14f8ab
-
14f8ab
-    while (blockers) {
14f8ab
-        synclock_unlock(&priv->big_lock);
14f8ab
-        sleep(1);
14f8ab
-        blockers = GF_ATOMIC_GET(priv->blockers);
14f8ab
-        synclock_lock(&priv->big_lock);
14f8ab
+    while (GF_ATOMIC_GET(priv->blockers)) {
14f8ab
+        synccond_wait(&priv->cond_blockers, &priv->big_lock);
14f8ab
     }
14f8ab
 }
14f8ab
 
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
14f8ab
index 36018a0..f55a5fd 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
14f8ab
@@ -112,7 +112,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
14f8ab
         goto out;
14f8ab
 
14f8ab
     synclock_unlock(&conf->big_lock);
14f8ab
-    sleep(1);
14f8ab
+    synctask_sleep(1);
14f8ab
     synclock_lock(&conf->big_lock);
14f8ab
     if (gf_is_service_running(proc->pidfile, &pid)) {
14f8ab
         ret = kill(pid, SIGKILL);
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
14f8ab
index d225854..386eed2 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
14f8ab
@@ -1961,9 +1961,7 @@ glusterd_update_snaps_synctask(void *opaque)
14f8ab
     synclock_lock(&conf->big_lock);
14f8ab
 
14f8ab
     while (conf->restart_bricks) {
14f8ab
-        synclock_unlock(&conf->big_lock);
14f8ab
-        sleep(2);
14f8ab
-        synclock_lock(&conf->big_lock);
14f8ab
+        synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
14f8ab
     }
14f8ab
     conf->restart_bricks = _gf_true;
14f8ab
 
14f8ab
@@ -2070,6 +2068,7 @@ out:
14f8ab
     if (dict)
14f8ab
         dict_unref(dict);
14f8ab
     conf->restart_bricks = _gf_false;
14f8ab
+    synccond_broadcast(&conf->cond_restart_bricks);
14f8ab
 
14f8ab
     return ret;
14f8ab
 }
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
14f8ab
index ce4a940..a265f21 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
14f8ab
@@ -32,7 +32,7 @@
14f8ab
         ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum,   \
14f8ab
                                        cbk, (xdrproc_t)xdrproc);               \
14f8ab
         if (!ret)                                                              \
14f8ab
-            synctask_yield(stb->task);                                         \
14f8ab
+            synctask_yield(stb->task, NULL);                                   \
14f8ab
         else                                                                   \
14f8ab
             gf_asprintf(&stb->errstr,                                          \
14f8ab
                         "%s failed. Check log file"                            \
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
index 812c698..ce9931c 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
14f8ab
@@ -5068,22 +5068,22 @@ glusterd_import_friend_volumes_synctask(void *opaque)
14f8ab
      * restarted (refer glusterd_restart_bricks ())
14f8ab
      */
14f8ab
     while (conf->restart_bricks) {
14f8ab
-        synclock_unlock(&conf->big_lock);
14f8ab
-        sleep(2);
14f8ab
-        synclock_lock(&conf->big_lock);
14f8ab
+        synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
14f8ab
     }
14f8ab
     conf->restart_bricks = _gf_true;
14f8ab
 
14f8ab
     while (i <= count) {
14f8ab
         ret = glusterd_import_friend_volume(peer_data, i);
14f8ab
         if (ret) {
14f8ab
-            conf->restart_bricks = _gf_false;
14f8ab
-            goto out;
14f8ab
+            break;
14f8ab
         }
14f8ab
         i++;
14f8ab
     }
14f8ab
-    glusterd_svcs_manager(NULL);
14f8ab
+    if (i > count) {
14f8ab
+        glusterd_svcs_manager(NULL);
14f8ab
+    }
14f8ab
     conf->restart_bricks = _gf_false;
14f8ab
+    synccond_broadcast(&conf->cond_restart_bricks);
14f8ab
 out:
14f8ab
     if (peer_data)
14f8ab
         dict_unref(peer_data);
14f8ab
@@ -5769,7 +5769,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
14f8ab
     call_frame_t *frame = v_frame;
14f8ab
     glusterd_conf_t *conf = frame->this->private;
14f8ab
 
14f8ab
-    GF_ATOMIC_DEC(conf->blockers);
14f8ab
+    if (GF_ATOMIC_DEC(conf->blockers) == 0) {
14f8ab
+        synccond_broadcast(&conf->cond_blockers);
14f8ab
+    }
14f8ab
 
14f8ab
     STACK_DESTROY(frame->root);
14f8ab
     return 0;
14f8ab
@@ -5865,7 +5867,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count,
14f8ab
         }
14f8ab
     }
14f8ab
 out:
14f8ab
-    GF_ATOMIC_DEC(conf->blockers);
14f8ab
+    if (GF_ATOMIC_DEC(conf->blockers) == 0) {
14f8ab
+        synccond_broadcast(&conf->cond_blockers);
14f8ab
+    }
14f8ab
     STACK_DESTROY(frame->root);
14f8ab
     return 0;
14f8ab
 }
14f8ab
@@ -6053,7 +6057,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo,
14f8ab
          * TBD: see if there's a better way
14f8ab
          */
14f8ab
         synclock_unlock(&conf->big_lock);
14f8ab
-        sleep(1);
14f8ab
+        synctask_sleep(1);
14f8ab
         synclock_lock(&conf->big_lock);
14f8ab
     }
14f8ab
 
14f8ab
@@ -6193,7 +6197,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf,
14f8ab
                          "brick %s is still"
14f8ab
                          " starting, waiting for 2 seconds ",
14f8ab
                          other_brick->path);
14f8ab
-            sleep(2);
14f8ab
+            synctask_sleep(2);
14f8ab
             synclock_lock(&conf->big_lock);
14f8ab
             retries--;
14f8ab
         }
14f8ab
@@ -6680,9 +6684,7 @@ glusterd_restart_bricks(void *opaque)
14f8ab
      * glusterd_compare_friend_data ())
14f8ab
      */
14f8ab
     while (conf->restart_bricks) {
14f8ab
-        synclock_unlock(&conf->big_lock);
14f8ab
-        sleep(2);
14f8ab
-        synclock_lock(&conf->big_lock);
14f8ab
+        synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
14f8ab
     }
14f8ab
     conf->restart_bricks = _gf_true;
14f8ab
 
14f8ab
@@ -6798,6 +6800,7 @@ out:
14f8ab
     GF_ATOMIC_DEC(conf->blockers);
14f8ab
     conf->restart_done = _gf_true;
14f8ab
     conf->restart_bricks = _gf_false;
14f8ab
+    synccond_broadcast(&conf->cond_restart_bricks);
14f8ab
 
14f8ab
 return_block:
14f8ab
     return ret;
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
14f8ab
index d360312..a01034a 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd.c
14f8ab
@@ -1845,6 +1845,8 @@ init(xlator_t *this)
14f8ab
     (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir));
14f8ab
 
14f8ab
     synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE);
14f8ab
+    synccond_init(&conf->cond_restart_bricks);
14f8ab
+    synccond_init(&conf->cond_blockers);
14f8ab
     pthread_mutex_init(&conf->xprt_lock, NULL);
14f8ab
     INIT_LIST_HEAD(&conf->xprt_list);
14f8ab
     pthread_mutex_init(&conf->import_volumes, NULL);
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
index 2be005c..1c6c3b1 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd.h
14f8ab
@@ -209,6 +209,8 @@ typedef struct {
14f8ab
     dict_t *opts;
14f8ab
     synclock_t big_lock;
14f8ab
     gf_boolean_t restart_done;
14f8ab
+    synccond_t cond_restart_bricks;
14f8ab
+    synccond_t cond_blockers;
14f8ab
     rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */
14f8ab
     uint32_t base_port;
14f8ab
     uint32_t max_port;
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab