yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch

a83cc2
From 9182af6a819e60a079349fd6d8b28a28adea90b1 Mon Sep 17 00:00:00 2001
a83cc2
From: Sergio Lopez Pascual <slp@redhat.com>
a83cc2
Date: Thu, 17 Jun 2021 09:13:21 -0400
a83cc2
Subject: [PATCH 06/12] nbd/server: Use drained block ops to quiesce the server
a83cc2
MIME-Version: 1.0
a83cc2
Content-Type: text/plain; charset=UTF-8
a83cc2
Content-Transfer-Encoding: 8bit
a83cc2
a83cc2
RH-Author: Miroslav Rezanina <mrezanin@redhat.com>
a83cc2
RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9
a83cc2
RH-Commit: [4/8] ca32c99563254a8a31104948e41fa691453d0399 (mrezanin/centos-src-qemu-kvm)
a83cc2
RH-Bugzilla: 1957194
a83cc2
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
a83cc2
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
a83cc2
a83cc2
Before switching between AioContexts we need to make sure that we're
a83cc2
fully quiesced ("nb_requests == 0" for every client) when entering the
a83cc2
drained section.
a83cc2
a83cc2
To do this, we set "quiescing = true" for every client on
a83cc2
".drained_begin" to prevent new coroutines from being created, and
a83cc2
check if "nb_requests == 0" on ".drained_poll". Finally, once we're
a83cc2
exiting the drained section, on ".drained_end" we set "quiescing =
a83cc2
false" and call "nbd_client_receive_next_request()" to resume the
a83cc2
processing of new requests.
a83cc2
a83cc2
With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be
a83cc2
reverted to be as simple as they were before f148ae7d36.
a83cc2
a83cc2
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137
a83cc2
Suggested-by: Kevin Wolf <kwolf@redhat.com>
a83cc2
Signed-off-by: Sergio Lopez <slp@redhat.com>
a83cc2
Message-Id: <20210602060552.17433-3-slp@redhat.com>
a83cc2
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
a83cc2
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
a83cc2
(cherry picked from commit fd6afc501a019682d1b8468b562355a2887087bd)
a83cc2
Signed-off-by: Sergio Lopez <slp@redhat.com>
a83cc2
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
a83cc2
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
a83cc2
---
a83cc2
 nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++--------------
a83cc2
 1 file changed, 61 insertions(+), 21 deletions(-)
a83cc2
a83cc2
diff --git a/nbd/server.c b/nbd/server.c
a83cc2
index 86a44a9b41..b60ebc3ab6 100644
a83cc2
--- a/nbd/server.c
a83cc2
+++ b/nbd/server.c
a83cc2
@@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req)
a83cc2
     g_free(req);
a83cc2
 
a83cc2
     client->nb_requests--;
a83cc2
+
a83cc2
+    if (client->quiescing && client->nb_requests == 0) {
a83cc2
+        aio_wait_kick();
a83cc2
+    }
a83cc2
+
a83cc2
     nbd_client_receive_next_request(client);
a83cc2
 
a83cc2
     nbd_client_put(client);
a83cc2
@@ -1530,49 +1535,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
a83cc2
     QTAILQ_FOREACH(client, &exp->clients, next) {
a83cc2
         qio_channel_attach_aio_context(client->ioc, ctx);
a83cc2
 
a83cc2
+        assert(client->nb_requests == 0);
a83cc2
         assert(client->recv_coroutine == NULL);
a83cc2
         assert(client->send_coroutine == NULL);
a83cc2
-
a83cc2
-        if (client->quiescing) {
a83cc2
-            client->quiescing = false;
a83cc2
-            nbd_client_receive_next_request(client);
a83cc2
-        }
a83cc2
     }
a83cc2
 }
a83cc2
 
a83cc2
-static void nbd_aio_detach_bh(void *opaque)
a83cc2
+static void blk_aio_detach(void *opaque)
a83cc2
 {
a83cc2
     NBDExport *exp = opaque;
a83cc2
     NBDClient *client;
a83cc2
 
a83cc2
+    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
a83cc2
+
a83cc2
     QTAILQ_FOREACH(client, &exp->clients, next) {
a83cc2
         qio_channel_detach_aio_context(client->ioc);
a83cc2
+    }
a83cc2
+
a83cc2
+    exp->common.ctx = NULL;
a83cc2
+}
a83cc2
+
a83cc2
+static void nbd_drained_begin(void *opaque)
a83cc2
+{
a83cc2
+    NBDExport *exp = opaque;
a83cc2
+    NBDClient *client;
a83cc2
+
a83cc2
+    QTAILQ_FOREACH(client, &exp->clients, next) {
a83cc2
         client->quiescing = true;
a83cc2
+    }
a83cc2
+}
a83cc2
 
a83cc2
-        if (client->recv_coroutine) {
a83cc2
-            if (client->read_yielding) {
a83cc2
-                qemu_aio_coroutine_enter(exp->common.ctx,
a83cc2
-                                         client->recv_coroutine);
a83cc2
-            } else {
a83cc2
-                AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
a83cc2
-            }
a83cc2
-        }
a83cc2
+static void nbd_drained_end(void *opaque)
a83cc2
+{
a83cc2
+    NBDExport *exp = opaque;
a83cc2
+    NBDClient *client;
a83cc2
 
a83cc2
-        if (client->send_coroutine) {
a83cc2
-            AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
a83cc2
-        }
a83cc2
+    QTAILQ_FOREACH(client, &exp->clients, next) {
a83cc2
+        client->quiescing = false;
a83cc2
+        nbd_client_receive_next_request(client);
a83cc2
     }
a83cc2
 }
a83cc2
 
a83cc2
-static void blk_aio_detach(void *opaque)
a83cc2
+static bool nbd_drained_poll(void *opaque)
a83cc2
 {
a83cc2
     NBDExport *exp = opaque;
a83cc2
+    NBDClient *client;
a83cc2
 
a83cc2
-    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
a83cc2
+    QTAILQ_FOREACH(client, &exp->clients, next) {
a83cc2
+        if (client->nb_requests != 0) {
a83cc2
+            /*
a83cc2
+             * If there's a coroutine waiting for a request on nbd_read_eof()
a83cc2
+             * enter it here so we don't depend on the client to wake it up.
a83cc2
+             */
a83cc2
+            if (client->recv_coroutine != NULL && client->read_yielding) {
a83cc2
+                qemu_aio_coroutine_enter(exp->common.ctx,
a83cc2
+                                         client->recv_coroutine);
a83cc2
+            }
a83cc2
 
a83cc2
-    aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
a83cc2
+            return true;
a83cc2
+        }
a83cc2
+    }
a83cc2
 
a83cc2
-    exp->common.ctx = NULL;
a83cc2
+    return false;
a83cc2
 }
a83cc2
 
a83cc2
 static void nbd_eject_notifier(Notifier *n, void *data)
a83cc2
@@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
a83cc2
     blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
a83cc2
 }
a83cc2
 
a83cc2
+static const BlockDevOps nbd_block_ops = {
a83cc2
+    .drained_begin = nbd_drained_begin,
a83cc2
+    .drained_end = nbd_drained_end,
a83cc2
+    .drained_poll = nbd_drained_poll,
a83cc2
+};
a83cc2
+
a83cc2
 static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
a83cc2
                              Error **errp)
a83cc2
 {
a83cc2
@@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
a83cc2
 
a83cc2
     exp->allocation_depth = arg->allocation_depth;
a83cc2
 
a83cc2
+    /*
a83cc2
+     * We need to inhibit request queuing in the block layer to ensure we can
a83cc2
+     * be properly quiesced when entering a drained section, as our coroutines
a83cc2
+     * servicing pending requests might enter blk_pread().
a83cc2
+     */
a83cc2
+    blk_set_disable_request_queuing(blk, true);
a83cc2
+
a83cc2
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
a83cc2
 
a83cc2
+    blk_set_dev_ops(blk, &nbd_block_ops, exp);
a83cc2
+
a83cc2
     QTAILQ_INSERT_TAIL(&exports, exp, next);
a83cc2
 
a83cc2
     return 0;
a83cc2
@@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp)
a83cc2
         }
a83cc2
         blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
a83cc2
                                         blk_aio_detach, exp);
a83cc2
+        blk_set_disable_request_queuing(exp->common.blk, false);
a83cc2
     }
a83cc2
 
a83cc2
     for (i = 0; i < exp->nr_export_bitmaps; i++) {
a83cc2
-- 
a83cc2
2.27.0
a83cc2