586cba
From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001
586cba
From: Stefan Hajnoczi <stefanha@redhat.com>
586cba
Date: Tue, 17 May 2022 12:08:04 +0100
586cba
Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS()
586cba
MIME-Version: 1.0
586cba
Content-Type: text/plain; charset=UTF-8
586cba
Content-Transfer-Encoding: 8bit
586cba
586cba
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
586cba
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
586cba
RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm)
586cba
RH-Bugzilla: 1952483
586cba
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
586cba
RH-Acked-by: Eric Blake <eblake@redhat.com>
586cba
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
586cba
586cba
Thread-Local Storage variables cannot be used directly from coroutine
586cba
code because the compiler may optimize TLS variable accesses across
586cba
qemu_coroutine_yield() calls. When the coroutine is re-entered from
586cba
another thread the TLS variables from the old thread must no longer be
586cba
used.
586cba
586cba
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
586cba
The alloc_pool QSLIST needs a typedef so the return value of
586cba
get_ptr_alloc_pool() can be stored in a local variable.
586cba
586cba
One example of why this code is necessary: a coroutine that yields
586cba
before calling qemu_coroutine_create() to create another coroutine is
586cba
affected by the TLS issue.
586cba
586cba
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
586cba
Message-Id: <20220307153853.602859-3-stefanha@redhat.com>
586cba
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
586cba
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
586cba
(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90)
586cba
586cba
Conflicts:
586cba
- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to
586cba
  constant batch size").
586cba
586cba
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
586cba
---
586cba
 util/qemu-coroutine.c | 41 ++++++++++++++++++++++++-----------------
586cba
 1 file changed, 24 insertions(+), 17 deletions(-)
586cba
586cba
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
586cba
index 804f672e0a..4a8bd63ef0 100644
586cba
--- a/util/qemu-coroutine.c
586cba
+++ b/util/qemu-coroutine.c
586cba
@@ -18,6 +18,7 @@
586cba
 #include "qemu/atomic.h"
586cba
 #include "qemu/coroutine.h"
586cba
 #include "qemu/coroutine_int.h"
586cba
+#include "qemu/coroutine-tls.h"
586cba
 #include "block/aio.h"
586cba
 
586cba
 /**
586cba
@@ -35,17 +36,20 @@ enum {
586cba
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
586cba
 static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
586cba
 static unsigned int release_pool_size;
586cba
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
586cba
-static __thread unsigned int alloc_pool_size;
586cba
-static __thread Notifier coroutine_pool_cleanup_notifier;
586cba
+
586cba
+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
586cba
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
586cba
+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
586cba
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
586cba
 
586cba
 static void coroutine_pool_cleanup(Notifier *n, void *value)
586cba
 {
586cba
     Coroutine *co;
586cba
     Coroutine *tmp;
586cba
+    CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
586cba
 
586cba
-    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
586cba
-        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
586cba
+    QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
586cba
+        QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
586cba
         qemu_coroutine_delete(co);
586cba
     }
586cba
 }
586cba
@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
586cba
     Coroutine *co = NULL;
586cba
 
586cba
     if (CONFIG_COROUTINE_POOL) {
586cba
-        co = QSLIST_FIRST(&alloc_pool);
586cba
+        CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
586cba
+
586cba
+        co = QSLIST_FIRST(alloc_pool);
586cba
         if (!co) {
586cba
             if (release_pool_size > POOL_MIN_BATCH_SIZE) {
586cba
                 /* Slow path; a good place to register the destructor, too.  */
586cba
-                if (!coroutine_pool_cleanup_notifier.notify) {
586cba
-                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
586cba
-                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
586cba
+                Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
586cba
+                if (!notifier->notify) {
586cba
+                    notifier->notify = coroutine_pool_cleanup;
586cba
+                    qemu_thread_atexit_add(notifier);
586cba
                 }
586cba
 
586cba
                 /* This is not exact; there could be a little skew between
586cba
                  * release_pool_size and the actual size of release_pool.  But
586cba
                  * it is just a heuristic, it does not need to be perfect.
586cba
                  */
586cba
-                alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
586cba
-                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
586cba
-                co = QSLIST_FIRST(&alloc_pool);
586cba
+                set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
586cba
+                QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
586cba
+                co = QSLIST_FIRST(alloc_pool);
586cba
             }
586cba
         }
586cba
         if (co) {
586cba
-            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
586cba
-            alloc_pool_size--;
586cba
+            QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
586cba
+            set_alloc_pool_size(get_alloc_pool_size() - 1);
586cba
         }
586cba
     }
586cba
 
586cba
@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co)
586cba
             qatomic_inc(&release_pool_size);
586cba
             return;
586cba
         }
586cba
-        if (alloc_pool_size < qatomic_read(&pool_max_size)) {
586cba
-            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
586cba
-            alloc_pool_size++;
586cba
+        if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
586cba
+            QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
586cba
+            set_alloc_pool_size(get_alloc_pool_size() + 1);
586cba
             return;
586cba
         }
586cba
     }
586cba
-- 
586cba
2.31.1
586cba