586cba
From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001
586cba
From: Kevin Wolf <kwolf@redhat.com>
586cba
Date: Tue, 10 May 2022 17:10:20 +0200
586cba
Subject: [PATCH 08/16] coroutine: Revert to constant batch size
586cba
586cba
RH-Author: Kevin Wolf <kwolf@redhat.com>
586cba
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
586cba
RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm)
586cba
RH-Bugzilla: 2079938
586cba
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
586cba
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
586cba
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
586cba
586cba
Commit 4c41c69e changed the way the coroutine pool is sized because for
586cba
virtio-blk devices with a large queue size and heavy I/O, it was just
586cba
too small and caused coroutines to be deleted and reallocated soon
586cba
afterwards. The change made the size dynamic based on the number of
586cba
queues and the queue size of virtio-blk devices.
586cba
586cba
There are two important numbers here: Slightly simplified, when a
586cba
coroutine terminates, it is generally stored in the global release pool
586cba
up to a certain pool size, and if the pool is full, it is freed.
586cba
Conversely, when allocating a new coroutine, the coroutines in the
586cba
release pool are reused if the pool already has reached a certain
586cba
minimum size (the batch size), otherwise we allocate new coroutines.
586cba
586cba
The problem after commit 4c41c69e is that it not only increases the
586cba
maximum pool size (which is the intended effect), but also the batch
586cba
size for reusing coroutines (which is a bug). It means that in cases
586cba
with many devices and/or a large queue size (which defaults to the
586cba
number of vcpus for virtio-blk-pci), many thousand coroutines could be
586cba
sitting in the release pool without being reused.
586cba
586cba
This is not only a waste of memory and allocations, but it actually
586cba
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
586cba
because each coroutine requires two mappings (its stack and the guard
586cba
page for the stack), causing it to abort() in qemu_alloc_stack() because
586cba
when the limit is hit, mprotect() starts to fail with ENOMEM.
586cba
586cba
In order to fix the problem, change the batch size back to 64 to avoid
586cba
uselessly accumulating coroutines in the release pool, but keep the
586cba
dynamic maximum pool size so that coroutines aren't freed too early
586cba
in heavy I/O scenarios.
586cba
586cba
Note that this fix doesn't strictly make it impossible to hit the limit,
586cba
but this would only happen if most of the coroutines are actually in use
586cba
at the same time, not just sitting in a pool. This is the same behaviour
586cba
as we already had before commit 4c41c69e. Fully preventing this would
586cba
require allowing qemu_coroutine_create() to return an error, but it
586cba
doesn't seem to be a scenario that people hit in practice.
586cba
586cba
Cc: qemu-stable@nongnu.org
586cba
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
586cba
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
586cba
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
586cba
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
586cba
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
586cba
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
586cba
(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
586cba
586cba
Conflicts:
586cba
	util/qemu-coroutine.c
586cba
586cba
Trivial merge conflict because we don't have commit ac387a08 downstream.
586cba
586cba
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
586cba
---
586cba
 util/qemu-coroutine.c | 22 ++++++++++++++--------
586cba
 1 file changed, 14 insertions(+), 8 deletions(-)
586cba
586cba
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
586cba
index faca0ca97c..804f672e0a 100644
586cba
--- a/util/qemu-coroutine.c
586cba
+++ b/util/qemu-coroutine.c
586cba
@@ -20,14 +20,20 @@
586cba
 #include "qemu/coroutine_int.h"
586cba
 #include "block/aio.h"
586cba
 
586cba
-/** Initial batch size is 64, and is increased on demand */
586cba
+/**
586cba
+ * The minimal batch size is always 64, coroutines from the release_pool are
586cba
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
586cba
+ * with 64 and is increased on demand so that coroutines are not deleted even if
586cba
+ * they are not immediately reused.
586cba
+ */
586cba
 enum {
586cba
-    POOL_INITIAL_BATCH_SIZE = 64,
586cba
+    POOL_MIN_BATCH_SIZE = 64,
586cba
+    POOL_INITIAL_MAX_SIZE = 64,
586cba
 };
586cba
 
586cba
 /** Free list to speed up creation */
586cba
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
586cba
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
586cba
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
586cba
 static unsigned int release_pool_size;
586cba
 static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
586cba
 static __thread unsigned int alloc_pool_size;
586cba
@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
586cba
     if (CONFIG_COROUTINE_POOL) {
586cba
         co = QSLIST_FIRST(&alloc_pool);
586cba
         if (!co) {
586cba
-            if (release_pool_size > qatomic_read(&pool_batch_size)) {
586cba
+            if (release_pool_size > POOL_MIN_BATCH_SIZE) {
586cba
                 /* Slow path; a good place to register the destructor, too.  */
586cba
                 if (!coroutine_pool_cleanup_notifier.notify) {
586cba
                     coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
586cba
@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co)
586cba
     co->caller = NULL;
586cba
 
586cba
     if (CONFIG_COROUTINE_POOL) {
586cba
-        if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
586cba
+        if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
586cba
             QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
586cba
             qatomic_inc(&release_pool_size);
586cba
             return;
586cba
         }
586cba
-        if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
586cba
+        if (alloc_pool_size < qatomic_read(&pool_max_size)) {
586cba
             QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
586cba
             alloc_pool_size++;
586cba
             return;
586cba
@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
586cba
 
586cba
 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
586cba
 {
586cba
-    qatomic_add(&pool_batch_size, additional_pool_size);
586cba
+    qatomic_add(&pool_max_size, additional_pool_size);
586cba
 }
586cba
 
586cba
 void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
586cba
 {
586cba
-    qatomic_sub(&pool_batch_size, removing_pool_size);
586cba
+    qatomic_sub(&pool_max_size, removing_pool_size);
586cba
 }
586cba
-- 
586cba
2.31.1
586cba