|
|
ae23c9 |
From 7946b28c7e68c564b0734869ff6f4e36bd5e2e2d Mon Sep 17 00:00:00 2001
|
|
|
ae23c9 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Date: Wed, 10 Oct 2018 20:21:48 +0100
|
|
|
ae23c9 |
Subject: [PATCH 22/49] block: Allow graph changes in bdrv_drain_all_begin/end
|
|
|
ae23c9 |
sections
|
|
|
ae23c9 |
|
|
|
ae23c9 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Message-id: <20181010202213.7372-10-kwolf@redhat.com>
|
|
|
ae23c9 |
Patchwork-id: 82597
|
|
|
ae23c9 |
O-Subject: [RHEL-8 qemu-kvm PATCH 19/44] block: Allow graph changes in bdrv_drain_all_begin/end sections
|
|
|
ae23c9 |
Bugzilla: 1637976
|
|
|
ae23c9 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and
|
|
|
ae23c9 |
did a subtree drain for each of them. This works fine as long as the
|
|
|
ae23c9 |
graph is static, but sadly, reality looks different.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
If the graph changes so that root nodes are added or removed, we would
|
|
|
ae23c9 |
have to compensate for this. bdrv_next() returns each root node only
|
|
|
ae23c9 |
once even if it's the root node for multiple BlockBackends or for a
|
|
|
ae23c9 |
monitor-owned block driver tree, which would only complicate things.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
The much easier and more obviously correct way is to fundamentally
|
|
|
ae23c9 |
change the way the functions work: Iterate over all BlockDriverStates,
|
|
|
ae23c9 |
no matter who owns them, and drain them individually. Compensation is
|
|
|
ae23c9 |
only necessary when a new BDS is created inside a drain_all section.
|
|
|
ae23c9 |
Removal of a BDS doesn't require any action because it's gone afterwards
|
|
|
ae23c9 |
anyway.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
(cherry picked from commit 0f12264e7a41458179ad10276a7c33c72024861a)
|
|
|
ae23c9 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
ae23c9 |
---
|
|
|
ae23c9 |
block.c | 34 ++++++++++++++++++++++++---
|
|
|
ae23c9 |
block/io.c | 60 ++++++++++++++++++++++++++++++++++++-----------
|
|
|
ae23c9 |
include/block/block.h | 1 +
|
|
|
ae23c9 |
include/block/block_int.h | 1 +
|
|
|
ae23c9 |
4 files changed, 79 insertions(+), 17 deletions(-)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
diff --git a/block.c b/block.c
|
|
|
ae23c9 |
index 519be6e..eea9c6f 100644
|
|
|
ae23c9 |
--- a/block.c
|
|
|
ae23c9 |
+++ b/block.c
|
|
|
ae23c9 |
@@ -333,6 +333,10 @@ BlockDriverState *bdrv_new(void)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
qemu_co_queue_init(&bs->flush_queue);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+ for (i = 0; i < bdrv_drain_all_count; i++) {
|
|
|
ae23c9 |
+ bdrv_drained_begin(bs);
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
return bs;
|
|
|
ae23c9 |
@@ -1170,7 +1174,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
|
|
ae23c9 |
int open_flags, Error **errp)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
Error *local_err = NULL;
|
|
|
ae23c9 |
- int ret;
|
|
|
ae23c9 |
+ int i, ret;
|
|
|
ae23c9 |
|
|
|
ae23c9 |
bdrv_assign_node_name(bs, node_name, &local_err);
|
|
|
ae23c9 |
if (local_err) {
|
|
|
ae23c9 |
@@ -1218,6 +1222,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
|
|
ae23c9 |
assert(bdrv_min_mem_align(bs) != 0);
|
|
|
ae23c9 |
assert(is_power_of_2(bs->bl.request_alignment));
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+ for (i = 0; i < bs->quiesce_counter; i++) {
|
|
|
ae23c9 |
+ if (drv->bdrv_co_drain_begin) {
|
|
|
ae23c9 |
+ drv->bdrv_co_drain_begin(bs);
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
return 0;
|
|
|
ae23c9 |
open_failed:
|
|
|
ae23c9 |
bs->drv = NULL;
|
|
|
ae23c9 |
@@ -2039,7 +2049,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
|
|
ae23c9 |
child->role->detach(child);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
if (old_bs->quiesce_counter && child->role->drained_end) {
|
|
|
ae23c9 |
- for (i = 0; i < old_bs->quiesce_counter; i++) {
|
|
|
ae23c9 |
+ int num = old_bs->quiesce_counter;
|
|
|
ae23c9 |
+ if (child->role->parent_is_bds) {
|
|
|
ae23c9 |
+ num -= bdrv_drain_all_count;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ assert(num >= 0);
|
|
|
ae23c9 |
+ for (i = 0; i < num; i++) {
|
|
|
ae23c9 |
child->role->drained_end(child);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
@@ -2051,7 +2066,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
|
|
|
ae23c9 |
if (new_bs) {
|
|
|
ae23c9 |
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
|
|
|
ae23c9 |
if (new_bs->quiesce_counter && child->role->drained_begin) {
|
|
|
ae23c9 |
- for (i = 0; i < new_bs->quiesce_counter; i++) {
|
|
|
ae23c9 |
+ int num = new_bs->quiesce_counter;
|
|
|
ae23c9 |
+ if (child->role->parent_is_bds) {
|
|
|
ae23c9 |
+ num -= bdrv_drain_all_count;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ assert(num >= 0);
|
|
|
ae23c9 |
+ for (i = 0; i < num; i++) {
|
|
|
ae23c9 |
child->role->drained_begin(child);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
@@ -3993,6 +4013,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
|
|
|
ae23c9 |
return QTAILQ_NEXT(bs, node_list);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ if (!bs) {
|
|
|
ae23c9 |
+ return QTAILQ_FIRST(&all_bdrv_states);
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ return QTAILQ_NEXT(bs, bs_list);
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
const char *bdrv_get_node_name(const BlockDriverState *bs)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
return bs->node_name;
|
|
|
ae23c9 |
diff --git a/block/io.c b/block/io.c
|
|
|
ae23c9 |
index 0021fefd..38ae299 100644
|
|
|
ae23c9 |
--- a/block/io.c
|
|
|
ae23c9 |
+++ b/block/io.c
|
|
|
ae23c9 |
@@ -38,6 +38,8 @@
|
|
|
ae23c9 |
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
|
|
|
ae23c9 |
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+static AioWait drain_all_aio_wait;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
|
|
|
ae23c9 |
int64_t offset, int bytes, BdrvRequestFlags flags);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
@@ -471,6 +473,29 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+unsigned int bdrv_drain_all_count = 0;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+static bool bdrv_drain_all_poll(void)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ BlockDriverState *bs = NULL;
|
|
|
ae23c9 |
+ bool result = false;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ /* Execute pending BHs first (may modify the graph) and check everything
|
|
|
ae23c9 |
+ * else only after the BHs have executed. */
|
|
|
ae23c9 |
+ while (aio_poll(qemu_get_aio_context(), false));
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ /* bdrv_drain_poll() can't make changes to the graph and we are holding the
|
|
|
ae23c9 |
+ * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
|
|
|
ae23c9 |
+ while ((bs = bdrv_next_all_states(bs))) {
|
|
|
ae23c9 |
+ AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
ae23c9 |
+ aio_context_acquire(aio_context);
|
|
|
ae23c9 |
+ result |= bdrv_drain_poll(bs, false, NULL, true);
|
|
|
ae23c9 |
+ aio_context_release(aio_context);
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ return result;
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
/*
|
|
|
ae23c9 |
* Wait for pending requests to complete across all BlockDriverStates
|
|
|
ae23c9 |
*
|
|
|
ae23c9 |
@@ -485,45 +510,51 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
|
|
|
ae23c9 |
*/
|
|
|
ae23c9 |
void bdrv_drain_all_begin(void)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
- BlockDriverState *bs;
|
|
|
ae23c9 |
- BdrvNextIterator it;
|
|
|
ae23c9 |
+ BlockDriverState *bs = NULL;
|
|
|
ae23c9 |
|
|
|
ae23c9 |
if (qemu_in_coroutine()) {
|
|
|
ae23c9 |
- bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
|
|
|
ae23c9 |
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
|
|
|
ae23c9 |
return;
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
|
|
|
ae23c9 |
- * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
|
|
|
ae23c9 |
- * nodes in several different AioContexts, so make sure we're in the main
|
|
|
ae23c9 |
- * context. */
|
|
|
ae23c9 |
+ /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
|
|
|
ae23c9 |
+ * loop AioContext, so make sure we're in the main context. */
|
|
|
ae23c9 |
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
|
|
ae23c9 |
+ assert(bdrv_drain_all_count < INT_MAX);
|
|
|
ae23c9 |
+ bdrv_drain_all_count++;
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
|
|
ae23c9 |
+ /* Quiesce all nodes, without polling in-flight requests yet. The graph
|
|
|
ae23c9 |
+ * cannot change during this loop. */
|
|
|
ae23c9 |
+ while ((bs = bdrv_next_all_states(bs))) {
|
|
|
ae23c9 |
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
aio_context_acquire(aio_context);
|
|
|
ae23c9 |
- bdrv_do_drained_begin(bs, true, NULL, false, true);
|
|
|
ae23c9 |
+ bdrv_do_drained_begin(bs, false, NULL, true, false);
|
|
|
ae23c9 |
aio_context_release(aio_context);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
|
|
ae23c9 |
+ /* Now poll the in-flight requests */
|
|
|
ae23c9 |
+ AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ while ((bs = bdrv_next_all_states(bs))) {
|
|
|
ae23c9 |
bdrv_drain_assert_idle(bs);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
void bdrv_drain_all_end(void)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
- BlockDriverState *bs;
|
|
|
ae23c9 |
- BdrvNextIterator it;
|
|
|
ae23c9 |
+ BlockDriverState *bs = NULL;
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
|
|
ae23c9 |
+ while ((bs = bdrv_next_all_states(bs))) {
|
|
|
ae23c9 |
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
aio_context_acquire(aio_context);
|
|
|
ae23c9 |
- bdrv_do_drained_end(bs, true, NULL, false);
|
|
|
ae23c9 |
+ bdrv_do_drained_end(bs, false, NULL, true);
|
|
|
ae23c9 |
aio_context_release(aio_context);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ assert(bdrv_drain_all_count > 0);
|
|
|
ae23c9 |
+ bdrv_drain_all_count--;
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
void bdrv_drain_all(void)
|
|
|
ae23c9 |
@@ -658,6 +689,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
|
|
|
ae23c9 |
void bdrv_wakeup(BlockDriverState *bs)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
aio_wait_kick(bdrv_get_aio_wait(bs));
|
|
|
ae23c9 |
+ aio_wait_kick(&drain_all_aio_wait);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
void bdrv_dec_in_flight(BlockDriverState *bs)
|
|
|
ae23c9 |
diff --git a/include/block/block.h b/include/block/block.h
|
|
|
ae23c9 |
index 6e91803..f9079ac 100644
|
|
|
ae23c9 |
--- a/include/block/block.h
|
|
|
ae23c9 |
+++ b/include/block/block.h
|
|
|
ae23c9 |
@@ -449,6 +449,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
|
|
|
ae23c9 |
Error **errp);
|
|
|
ae23c9 |
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
|
|
|
ae23c9 |
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
|
|
|
ae23c9 |
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
typedef struct BdrvNextIterator {
|
|
|
ae23c9 |
enum {
|
|
|
ae23c9 |
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
|
|
ae23c9 |
index 0ad8a76..9757d5e 100644
|
|
|
ae23c9 |
--- a/include/block/block_int.h
|
|
|
ae23c9 |
+++ b/include/block/block_int.h
|
|
|
ae23c9 |
@@ -845,6 +845,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
|
|
ae23c9 |
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
|
|
ae23c9 |
BdrvRequestFlags flags);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+extern unsigned int bdrv_drain_all_count;
|
|
|
ae23c9 |
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
|
|
|
ae23c9 |
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
--
|
|
|
ae23c9 |
1.8.3.1
|
|
|
ae23c9 |
|