26ba25
From 6acc1d617c7db4c575dc56c6035ea00315dffa20 Mon Sep 17 00:00:00 2001
26ba25
From: Kevin Wolf <kwolf@redhat.com>
26ba25
Date: Wed, 10 Oct 2018 20:22:12 +0100
26ba25
Subject: [PATCH 46/49] block: Use a single global AioWait
26ba25
26ba25
RH-Author: Kevin Wolf <kwolf@redhat.com>
26ba25
Message-id: <20181010202213.7372-34-kwolf@redhat.com>
26ba25
Patchwork-id: 82623
26ba25
O-Subject: [RHEL-8 qemu-kvm PATCH 43/44] block: Use a single global AioWait
26ba25
Bugzilla: 1637976
26ba25
RH-Acked-by: Max Reitz <mreitz@redhat.com>
26ba25
RH-Acked-by: John Snow <jsnow@redhat.com>
26ba25
RH-Acked-by: Thomas Huth <thuth@redhat.com>
26ba25
26ba25
When draining a block node, we recurse to its parent and for subtree
26ba25
drains also to its children. A single AIO_WAIT_WHILE() is then used to
26ba25
wait for bdrv_drain_poll() to become true, which depends on all of the
26ba25
nodes we recursed to. However, if the respective child or parent becomes
26ba25
quiescent and calls bdrv_wakeup(), only the AioWait of the child/parent
26ba25
is checked, while AIO_WAIT_WHILE() depends on the AioWait of the
26ba25
original node.
26ba25
26ba25
Fix this by using a single AioWait for all callers of AIO_WAIT_WHILE().
26ba25
26ba25
This may mean that the draining thread gets a few more unnecessary
26ba25
wakeups because an unrelated operation got completed, but we already
26ba25
wake it up when something _could_ have changed rather than only if it
26ba25
has certainly changed.
26ba25
26ba25
Apart from that, drain is a slow path anyway. In theory it would be
26ba25
possible to use wakeups more selectively and still correctly, but the
26ba25
gains are likely not worth the additional complexity. In fact, this
26ba25
patch is a nice simplification for some places in the code.
26ba25
26ba25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26ba25
Reviewed-by: Eric Blake <eblake@redhat.com>
26ba25
Reviewed-by: Max Reitz <mreitz@redhat.com>
26ba25
(cherry picked from commit cfe29d8294e06420e15d4938421ae006c8ac49e7)
26ba25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26ba25
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
26ba25
---
26ba25
 block.c                   |  5 -----
26ba25
 block/block-backend.c     | 11 ++++-------
26ba25
 block/io.c                |  7 ++-----
26ba25
 blockjob.c                | 13 +------------
26ba25
 include/block/aio-wait.h  | 22 +++++++++++-----------
26ba25
 include/block/block.h     |  6 +-----
26ba25
 include/block/block_int.h |  3 ---
26ba25
 include/block/blockjob.h  | 10 ----------
26ba25
 job.c                     |  3 +--
26ba25
 util/aio-wait.c           | 11 ++++++-----
26ba25
 10 files changed, 26 insertions(+), 65 deletions(-)
26ba25
26ba25
diff --git a/block.c b/block.c
26ba25
index e89b5e3..fbd569c 100644
26ba25
--- a/block.c
26ba25
+++ b/block.c
26ba25
@@ -4847,11 +4847,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
26ba25
     return bs ? bs->aio_context : qemu_get_aio_context();
26ba25
 }
26ba25
 
26ba25
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
26ba25
-{
26ba25
-    return bs ? &bs->wait : NULL;
26ba25
-}
26ba25
-
26ba25
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
26ba25
 {
26ba25
     aio_co_enter(bdrv_get_aio_context(bs), co);
26ba25
diff --git a/block/block-backend.c b/block/block-backend.c
26ba25
index b8ea286..91abfe6 100644
26ba25
--- a/block/block-backend.c
26ba25
+++ b/block/block-backend.c
26ba25
@@ -88,7 +88,6 @@ struct BlockBackend {
26ba25
      * Accessed with atomic ops.
26ba25
      */
26ba25
     unsigned int in_flight;
26ba25
-    AioWait wait;
26ba25
 };
26ba25
 
26ba25
 typedef struct BlockBackendAIOCB {
26ba25
@@ -1300,7 +1299,7 @@ static void blk_inc_in_flight(BlockBackend *blk)
26ba25
 static void blk_dec_in_flight(BlockBackend *blk)
26ba25
 {
26ba25
     atomic_dec(&blk->in_flight);
26ba25
-    aio_wait_kick(&blk->wait);
26ba25
+    aio_wait_kick();
26ba25
 }
26ba25
 
26ba25
 static void error_callback_bh(void *opaque)
26ba25
@@ -1601,9 +1600,8 @@ void blk_drain(BlockBackend *blk)
26ba25
     }
26ba25
 
26ba25
     /* We may have -ENOMEDIUM completions in flight */
26ba25
-    AIO_WAIT_WHILE(&blk->wait,
26ba25
-            blk_get_aio_context(blk),
26ba25
-            atomic_mb_read(&blk->in_flight) > 0);
26ba25
+    AIO_WAIT_WHILE(blk_get_aio_context(blk),
26ba25
+                   atomic_mb_read(&blk->in_flight) > 0);
26ba25
 
26ba25
     if (bs) {
26ba25
         bdrv_drained_end(bs);
26ba25
@@ -1622,8 +1620,7 @@ void blk_drain_all(void)
26ba25
         aio_context_acquire(ctx);
26ba25
 
26ba25
         /* We may have -ENOMEDIUM completions in flight */
26ba25
-        AIO_WAIT_WHILE(&blk->wait, ctx,
26ba25
-                atomic_mb_read(&blk->in_flight) > 0);
26ba25
+        AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
26ba25
 
26ba25
         aio_context_release(ctx);
26ba25
     }
26ba25
diff --git a/block/io.c b/block/io.c
26ba25
index 3313958..7a99f7b 100644
26ba25
--- a/block/io.c
26ba25
+++ b/block/io.c
26ba25
@@ -38,8 +38,6 @@
26ba25
 /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
26ba25
 #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
26ba25
 
26ba25
-static AioWait drain_all_aio_wait;
26ba25
-
26ba25
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
26ba25
     int64_t offset, int bytes, BdrvRequestFlags flags);
26ba25
 
26ba25
@@ -555,7 +553,7 @@ void bdrv_drain_all_begin(void)
26ba25
     }
26ba25
 
26ba25
     /* Now poll the in-flight requests */
26ba25
-    AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
26ba25
+    AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
26ba25
 
26ba25
     while ((bs = bdrv_next_all_states(bs))) {
26ba25
         bdrv_drain_assert_idle(bs);
26ba25
@@ -709,8 +707,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
26ba25
 
26ba25
 void bdrv_wakeup(BlockDriverState *bs)
26ba25
 {
26ba25
-    aio_wait_kick(bdrv_get_aio_wait(bs));
26ba25
-    aio_wait_kick(&drain_all_aio_wait);
26ba25
+    aio_wait_kick();
26ba25
 }
26ba25
 
26ba25
 void bdrv_dec_in_flight(BlockDriverState *bs)
26ba25
diff --git a/blockjob.c b/blockjob.c
26ba25
index 617d86f..06f2429 100644
26ba25
--- a/blockjob.c
26ba25
+++ b/blockjob.c
26ba25
@@ -221,20 +221,9 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
26ba25
     return 0;
26ba25
 }
26ba25
 
26ba25
-void block_job_wakeup_all_bdrv(BlockJob *job)
26ba25
-{
26ba25
-    GSList *l;
26ba25
-
26ba25
-    for (l = job->nodes; l; l = l->next) {
26ba25
-        BdrvChild *c = l->data;
26ba25
-        bdrv_wakeup(c->bs);
26ba25
-    }
26ba25
-}
26ba25
-
26ba25
 static void block_job_on_idle(Notifier *n, void *opaque)
26ba25
 {
26ba25
-    BlockJob *job = opaque;
26ba25
-    block_job_wakeup_all_bdrv(job);
26ba25
+    aio_wait_kick();
26ba25
 }
26ba25
 
26ba25
 bool block_job_is_internal(BlockJob *job)
26ba25
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
26ba25
index 600fad1..afd0ff7 100644
26ba25
--- a/include/block/aio-wait.h
26ba25
+++ b/include/block/aio-wait.h
26ba25
@@ -30,14 +30,15 @@
26ba25
 /**
26ba25
  * AioWait:
26ba25
  *
26ba25
- * An object that facilitates synchronous waiting on a condition.  The main
26ba25
- * loop can wait on an operation running in an IOThread as follows:
26ba25
+ * An object that facilitates synchronous waiting on a condition. A single
26ba25
+ * global AioWait object (global_aio_wait) is used internally.
26ba25
+ *
26ba25
+ * The main loop can wait on an operation running in an IOThread as follows:
26ba25
  *
26ba25
- *   AioWait *wait = ...;
26ba25
  *   AioContext *ctx = ...;
26ba25
  *   MyWork work = { .done = false };
26ba25
  *   schedule_my_work_in_iothread(ctx, &work);
26ba25
- *   AIO_WAIT_WHILE(wait, ctx, !work.done);
26ba25
+ *   AIO_WAIT_WHILE(ctx, !work.done);
26ba25
  *
26ba25
  * The IOThread must call aio_wait_kick() to notify the main loop when
26ba25
  * work.done changes:
26ba25
@@ -46,7 +47,7 @@
26ba25
  *   {
26ba25
  *       ...
26ba25
  *       work.done = true;
26ba25
- *       aio_wait_kick(wait);
26ba25
+ *       aio_wait_kick();
26ba25
  *   }
26ba25
  */
26ba25
 typedef struct {
26ba25
@@ -54,9 +55,10 @@ typedef struct {
26ba25
     unsigned num_waiters;
26ba25
 } AioWait;
26ba25
 
26ba25
+extern AioWait global_aio_wait;
26ba25
+
26ba25
 /**
26ba25
  * AIO_WAIT_WHILE:
26ba25
- * @wait: the aio wait object
26ba25
  * @ctx: the aio context, or NULL if multiple aio contexts (for which the
26ba25
  *       caller does not hold a lock) are involved in the polling condition.
26ba25
  * @cond: wait while this conditional expression is true
26ba25
@@ -72,9 +74,9 @@ typedef struct {
26ba25
  * wait on conditions between two IOThreads since that could lead to deadlock,
26ba25
  * go via the main loop instead.
26ba25
  */
26ba25
-#define AIO_WAIT_WHILE(wait, ctx, cond) ({                         \
26ba25
+#define AIO_WAIT_WHILE(ctx, cond) ({                               \
26ba25
     bool waited_ = false;                                          \
26ba25
-    AioWait *wait_ = (wait);                                       \
26ba25
+    AioWait *wait_ = &global_aio_wait;                             \
26ba25
     AioContext *ctx_ = (ctx);                                      \
26ba25
     /* Increment wait_->num_waiters before evaluating cond. */     \
26ba25
     atomic_inc(&wait_->num_waiters);                               \
26ba25
@@ -102,14 +104,12 @@ typedef struct {
26ba25
 
26ba25
 /**
26ba25
  * aio_wait_kick:
26ba25
- * @wait: the aio wait object that should re-evaluate its condition
26ba25
- *
26ba25
  * Wake up the main thread if it is waiting on AIO_WAIT_WHILE().  During
26ba25
  * synchronous operations performed in an IOThread, the main thread lets the
26ba25
  * IOThread's event loop run, waiting for the operation to complete.  A
26ba25
  * aio_wait_kick() call will wake up the main thread.
26ba25
  */
26ba25
-void aio_wait_kick(AioWait *wait);
26ba25
+void aio_wait_kick(void);
26ba25
 
26ba25
 /**
26ba25
  * aio_wait_bh_oneshot:
26ba25
diff --git a/include/block/block.h b/include/block/block.h
26ba25
index 356712c..8e78daf 100644
26ba25
--- a/include/block/block.h
26ba25
+++ b/include/block/block.h
26ba25
@@ -406,13 +406,9 @@ void bdrv_drain_all_begin(void);
26ba25
 void bdrv_drain_all_end(void);
26ba25
 void bdrv_drain_all(void);
26ba25
 
26ba25
-/* Returns NULL when bs == NULL */
26ba25
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
26ba25
-
26ba25
 #define BDRV_POLL_WHILE(bs, cond) ({                       \
26ba25
     BlockDriverState *bs_ = (bs);                          \
26ba25
-    AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_),                 \
26ba25
-                   bdrv_get_aio_context(bs_),              \
26ba25
+    AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
26ba25
                    cond); })
26ba25
 
26ba25
 int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
26ba25
diff --git a/include/block/block_int.h b/include/block/block_int.h
26ba25
index b7806e3..ff923b7 100644
26ba25
--- a/include/block/block_int.h
26ba25
+++ b/include/block/block_int.h
26ba25
@@ -782,9 +782,6 @@ struct BlockDriverState {
26ba25
     unsigned int in_flight;
26ba25
     unsigned int serialising_in_flight;
26ba25
 
26ba25
-    /* Kicked to signal main loop when a request completes. */
26ba25
-    AioWait wait;
26ba25
-
26ba25
     /* counter for nested bdrv_io_plug.
26ba25
      * Accessed with atomic ops.
26ba25
     */
26ba25
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
26ba25
index 2290bbb..ede0bd8 100644
26ba25
--- a/include/block/blockjob.h
26ba25
+++ b/include/block/blockjob.h
26ba25
@@ -122,16 +122,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
26ba25
 void block_job_remove_all_bdrv(BlockJob *job);
26ba25
 
26ba25
 /**
26ba25
- * block_job_wakeup_all_bdrv:
26ba25
- * @job: The block job
26ba25
- *
26ba25
- * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the
26ba25
- * job. This function is to be called whenever child_job_drained_poll() would
26ba25
- * go from true to false to notify waiting drain requests.
26ba25
- */
26ba25
-void block_job_wakeup_all_bdrv(BlockJob *job);
26ba25
-
26ba25
-/**
26ba25
  * block_job_set_speed:
26ba25
  * @job: The job to set the speed for.
26ba25
  * @speed: The new value
26ba25
diff --git a/job.c b/job.c
26ba25
index 5b53e43..3a7db59 100644
26ba25
--- a/job.c
26ba25
+++ b/job.c
26ba25
@@ -973,7 +973,6 @@ void job_complete(Job *job, Error **errp)
26ba25
 int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
26ba25
 {
26ba25
     Error *local_err = NULL;
26ba25
-    AioWait dummy_wait = {};
26ba25
     int ret;
26ba25
 
26ba25
     job_ref(job);
26ba25
@@ -987,7 +986,7 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
26ba25
         return -EBUSY;
26ba25
     }
26ba25
 
26ba25
-    AIO_WAIT_WHILE(&dummy_wait, job->aio_context,
26ba25
+    AIO_WAIT_WHILE(job->aio_context,
26ba25
                    (job_drain(job), !job_is_completed(job)));
26ba25
 
26ba25
     ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
26ba25
diff --git a/util/aio-wait.c b/util/aio-wait.c
26ba25
index b8a8f86..b487749 100644
26ba25
--- a/util/aio-wait.c
26ba25
+++ b/util/aio-wait.c
26ba25
@@ -26,21 +26,22 @@
26ba25
 #include "qemu/main-loop.h"
26ba25
 #include "block/aio-wait.h"
26ba25
 
26ba25
+AioWait global_aio_wait;
26ba25
+
26ba25
 static void dummy_bh_cb(void *opaque)
26ba25
 {
26ba25
     /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
26ba25
 }
26ba25
 
26ba25
-void aio_wait_kick(AioWait *wait)
26ba25
+void aio_wait_kick(void)
26ba25
 {
26ba25
     /* The barrier (or an atomic op) is in the caller.  */
26ba25
-    if (atomic_read(&wait->num_waiters)) {
26ba25
+    if (atomic_read(&global_aio_wait.num_waiters)) {
26ba25
         aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
26ba25
     }
26ba25
 }
26ba25
 
26ba25
 typedef struct {
26ba25
-    AioWait wait;
26ba25
     bool done;
26ba25
     QEMUBHFunc *cb;
26ba25
     void *opaque;
26ba25
@@ -54,7 +55,7 @@ static void aio_wait_bh(void *opaque)
26ba25
     data->cb(data->opaque);
26ba25
 
26ba25
     data->done = true;
26ba25
-    aio_wait_kick(&data->wait);
26ba25
+    aio_wait_kick();
26ba25
 }
26ba25
 
26ba25
 void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
26ba25
@@ -67,5 +68,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
26ba25
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
26ba25
 
26ba25
     aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
26ba25
-    AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
26ba25
+    AIO_WAIT_WHILE(ctx, !data.done);
26ba25
 }
26ba25
-- 
26ba25
1.8.3.1
26ba25