|
|
29b115 |
From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001
|
|
|
29b115 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
29b115 |
Date: Tue, 10 May 2022 17:10:20 +0200
|
|
|
29b115 |
Subject: [PATCH 08/16] coroutine: Revert to constant batch size
|
|
|
29b115 |
|
|
|
29b115 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
29b115 |
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
|
|
|
29b115 |
RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm)
|
|
|
29b115 |
RH-Bugzilla: 2079938
|
|
|
29b115 |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
|
29b115 |
|
|
|
29b115 |
Commit 4c41c69e changed the way the coroutine pool is sized because for
|
|
|
29b115 |
virtio-blk devices with a large queue size and heavy I/O, it was just
|
|
|
29b115 |
too small and caused coroutines to be deleted and reallocated soon
|
|
|
29b115 |
afterwards. The change made the size dynamic based on the number of
|
|
|
29b115 |
queues and the queue size of virtio-blk devices.
|
|
|
29b115 |
|
|
|
29b115 |
There are two important numbers here: Slightly simplified, when a
|
|
|
29b115 |
coroutine terminates, it is generally stored in the global release pool
|
|
|
29b115 |
up to a certain pool size, and if the pool is full, it is freed.
|
|
|
29b115 |
Conversely, when allocating a new coroutine, the coroutines in the
|
|
|
29b115 |
release pool are reused if the pool already has reached a certain
|
|
|
29b115 |
minimum size (the batch size), otherwise we allocate new coroutines.
|
|
|
29b115 |
|
|
|
29b115 |
The problem after commit 4c41c69e is that it not only increases the
|
|
|
29b115 |
maximum pool size (which is the intended effect), but also the batch
|
|
|
29b115 |
size for reusing coroutines (which is a bug). It means that in cases
|
|
|
29b115 |
with many devices and/or a large queue size (which defaults to the
|
|
|
29b115 |
number of vcpus for virtio-blk-pci), many thousand coroutines could be
|
|
|
29b115 |
sitting in the release pool without being reused.
|
|
|
29b115 |
|
|
|
29b115 |
This is not only a waste of memory and allocations, but it actually
|
|
|
29b115 |
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
|
|
|
29b115 |
because each coroutine requires two mappings (its stack and the guard
|
|
|
29b115 |
page for the stack), causing it to abort() in qemu_alloc_stack() because
|
|
|
29b115 |
when the limit is hit, mprotect() starts to fail with ENOMEM.
|
|
|
29b115 |
|
|
|
29b115 |
In order to fix the problem, change the batch size back to 64 to avoid
|
|
|
29b115 |
uselessly accumulating coroutines in the release pool, but keep the
|
|
|
29b115 |
dynamic maximum pool size so that coroutines aren't freed too early
|
|
|
29b115 |
in heavy I/O scenarios.
|
|
|
29b115 |
|
|
|
29b115 |
Note that this fix doesn't strictly make it impossible to hit the limit,
|
|
|
29b115 |
but this would only happen if most of the coroutines are actually in use
|
|
|
29b115 |
at the same time, not just sitting in a pool. This is the same behaviour
|
|
|
29b115 |
as we already had before commit 4c41c69e. Fully preventing this would
|
|
|
29b115 |
require allowing qemu_coroutine_create() to return an error, but it
|
|
|
29b115 |
doesn't seem to be a scenario that people hit in practice.
|
|
|
29b115 |
|
|
|
29b115 |
Cc: qemu-stable@nongnu.org
|
|
|
29b115 |
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
|
|
|
29b115 |
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
|
|
|
29b115 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
29b115 |
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
|
|
|
29b115 |
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
|
|
|
29b115 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
29b115 |
(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
|
|
|
29b115 |
|
|
|
29b115 |
Conflicts:
|
|
|
29b115 |
util/qemu-coroutine.c
|
|
|
29b115 |
|
|
|
29b115 |
Trivial merge conflict because we don't have commit ac387a08 downstream.
|
|
|
29b115 |
|
|
|
29b115 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
29b115 |
---
|
|
|
29b115 |
util/qemu-coroutine.c | 22 ++++++++++++++--------
|
|
|
29b115 |
1 file changed, 14 insertions(+), 8 deletions(-)
|
|
|
29b115 |
|
|
|
29b115 |
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
|
|
29b115 |
index faca0ca97c..804f672e0a 100644
|
|
|
29b115 |
--- a/util/qemu-coroutine.c
|
|
|
29b115 |
+++ b/util/qemu-coroutine.c
|
|
|
29b115 |
@@ -20,14 +20,20 @@
|
|
|
29b115 |
#include "qemu/coroutine_int.h"
|
|
|
29b115 |
#include "block/aio.h"
|
|
|
29b115 |
|
|
|
29b115 |
-/** Initial batch size is 64, and is increased on demand */
|
|
|
29b115 |
+/**
|
|
|
29b115 |
+ * The minimal batch size is always 64, coroutines from the release_pool are
|
|
|
29b115 |
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
|
|
29b115 |
+ * with 64 and is increased on demand so that coroutines are not deleted even if
|
|
|
29b115 |
+ * they are not immediately reused.
|
|
|
29b115 |
+ */
|
|
|
29b115 |
enum {
|
|
|
29b115 |
- POOL_INITIAL_BATCH_SIZE = 64,
|
|
|
29b115 |
+ POOL_MIN_BATCH_SIZE = 64,
|
|
|
29b115 |
+ POOL_INITIAL_MAX_SIZE = 64,
|
|
|
29b115 |
};
|
|
|
29b115 |
|
|
|
29b115 |
/** Free list to speed up creation */
|
|
|
29b115 |
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
|
|
29b115 |
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
|
|
|
29b115 |
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
|
|
29b115 |
static unsigned int release_pool_size;
|
|
|
29b115 |
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
|
|
|
29b115 |
static __thread unsigned int alloc_pool_size;
|
|
|
29b115 |
@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
|
|
29b115 |
if (CONFIG_COROUTINE_POOL) {
|
|
|
29b115 |
co = QSLIST_FIRST(&alloc_pool);
|
|
|
29b115 |
if (!co) {
|
|
|
29b115 |
- if (release_pool_size > qatomic_read(&pool_batch_size)) {
|
|
|
29b115 |
+ if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
|
|
29b115 |
/* Slow path; a good place to register the destructor, too. */
|
|
|
29b115 |
if (!coroutine_pool_cleanup_notifier.notify) {
|
|
|
29b115 |
coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
|
|
|
29b115 |
@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co)
|
|
|
29b115 |
co->caller = NULL;
|
|
|
29b115 |
|
|
|
29b115 |
if (CONFIG_COROUTINE_POOL) {
|
|
|
29b115 |
- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
|
|
|
29b115 |
+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
|
|
29b115 |
QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
|
|
29b115 |
qatomic_inc(&release_pool_size);
|
|
|
29b115 |
return;
|
|
|
29b115 |
}
|
|
|
29b115 |
- if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
|
|
|
29b115 |
+ if (alloc_pool_size < qatomic_read(&pool_max_size)) {
|
|
|
29b115 |
QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
|
|
|
29b115 |
alloc_pool_size++;
|
|
|
29b115 |
return;
|
|
|
29b115 |
@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
|
|
|
29b115 |
|
|
|
29b115 |
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
|
|
29b115 |
{
|
|
|
29b115 |
- qatomic_add(&pool_batch_size, additional_pool_size);
|
|
|
29b115 |
+ qatomic_add(&pool_max_size, additional_pool_size);
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
|
|
29b115 |
{
|
|
|
29b115 |
- qatomic_sub(&pool_batch_size, removing_pool_size);
|
|
|
29b115 |
+ qatomic_sub(&pool_max_size, removing_pool_size);
|
|
|
29b115 |
}
|
|
|
29b115 |
--
|
|
|
29b115 |
2.31.1
|
|
|
29b115 |
|