6e7d01
From 82a02aec3a8b3c2ac925d0b71ea4c35aa5d6463b Mon Sep 17 00:00:00 2001
6e7d01
From: Andrew Jones <drjones@redhat.com>
6e7d01
Date: Wed, 4 Aug 2021 03:27:24 -0400
6e7d01
Subject: [PATCH 2/2] async: use explicit memory barriers
6e7d01
6e7d01
RH-Author: Andrew Jones <drjones@redhat.com>
6e7d01
Message-id: <20210729134448.4995-3-drjones@redhat.com>
6e7d01
Patchwork-id: 101937
6e7d01
O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/2] async: use explicit memory barriers
6e7d01
Bugzilla: 1969848
6e7d01
RH-Acked-by: Gavin Shan <gshan@redhat.com>
6e7d01
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
6e7d01
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
6e7d01
6e7d01
From: Paolo Bonzini <pbonzini@redhat.com>
6e7d01
6e7d01
When using C11 atomics, non-seqcst reads and writes do not participate
6e7d01
in the total order of seqcst operations.  In util/async.c and util/aio-posix.c,
6e7d01
in particular, the pattern that we use
6e7d01
6e7d01
          write ctx->notify_me                 write bh->scheduled
6e7d01
          read bh->scheduled                   read ctx->notify_me
6e7d01
          if !bh->scheduled, sleep             if ctx->notify_me, notify
6e7d01
6e7d01
needs to use seqcst operations for both the write and the read.  In
6e7d01
general this is something that we do not want, because there can be
6e7d01
many sources that are polled in addition to bottom halves.  The
6e7d01
alternative is to place a seqcst memory barrier between the write
6e7d01
and the read.  This also comes with a disadvantage, in that the
6e7d01
memory barrier is implicit on strongly-ordered architectures and
6e7d01
it wastes a few dozen clock cycles.
6e7d01
6e7d01
Fortunately, ctx->notify_me is never written concurrently by two
6e7d01
threads, so we can assert that and relax the writes to ctx->notify_me.
6e7d01
The resulting solution works and performs well on both aarch64 and x86.
6e7d01
6e7d01
Note that the atomic_set/atomic_read combination is not an atomic
6e7d01
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
6e7d01
on x86, ATOMIC_RELAXED compiles to a locked operation.
6e7d01
6e7d01
Analyzed-by: Ying Fang <fangying1@huawei.com>
6e7d01
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6e7d01
Tested-by: Ying Fang <fangying1@huawei.com>
6e7d01
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
6e7d01
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6e7d01
(cherry picked from commit 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c)
6e7d01
Signed-off-by: Andrew Jones <drjones@redhat.com>
6e7d01
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
6e7d01
---
6e7d01
 util/aio-posix.c | 16 ++++++++++++++--
6e7d01
 util/aio-win32.c | 17 ++++++++++++++---
6e7d01
 util/async.c     | 16 ++++++++++++----
6e7d01
 3 files changed, 40 insertions(+), 9 deletions(-)
6e7d01
6e7d01
diff --git a/util/aio-posix.c b/util/aio-posix.c
6e7d01
index abc396d030..8cfb25650d 100644
6e7d01
--- a/util/aio-posix.c
6e7d01
+++ b/util/aio-posix.c
6e7d01
@@ -624,6 +624,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
     int64_t timeout;
6e7d01
     int64_t start = 0;
6e7d01
 
6e7d01
+    /*
6e7d01
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
6e7d01
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
6e7d01
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
6e7d01
+     */
6e7d01
     assert(in_aio_context_home_thread(ctx));
6e7d01
 
6e7d01
     /* aio_notify can avoid the expensive event_notifier_set if
6e7d01
@@ -634,7 +639,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
      * so disable the optimization now.
6e7d01
      */
6e7d01
     if (blocking) {
6e7d01
-        atomic_add(&ctx->notify_me, 2);
6e7d01
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
6e7d01
+        /*
6e7d01
+         * Write ctx->notify_me before computing the timeout
6e7d01
+         * (reading bottom half flags, etc.).  Pairs with
6e7d01
+         * smp_mb in aio_notify().
6e7d01
+         */
6e7d01
+        smp_mb();
6e7d01
     }
6e7d01
 
6e7d01
     qemu_lockcnt_inc(&ctx->list_lock);
6e7d01
@@ -679,7 +690,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
     }
6e7d01
 
6e7d01
     if (blocking) {
6e7d01
-        atomic_sub(&ctx->notify_me, 2);
6e7d01
+        /* Finish the poll before clearing the flag.  */
6e7d01
+        atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
6e7d01
         aio_notify_accept(ctx);
6e7d01
     }
6e7d01
 
6e7d01
diff --git a/util/aio-win32.c b/util/aio-win32.c
6e7d01
index a23b9c364d..729d533faf 100644
6e7d01
--- a/util/aio-win32.c
6e7d01
+++ b/util/aio-win32.c
6e7d01
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
     int count;
6e7d01
     int timeout;
6e7d01
 
6e7d01
+    /*
6e7d01
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
6e7d01
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
6e7d01
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
6e7d01
+     */
6e7d01
+    assert(in_aio_context_home_thread(ctx));
6e7d01
     progress = false;
6e7d01
 
6e7d01
     /* aio_notify can avoid the expensive event_notifier_set if
6e7d01
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
      * so disable the optimization now.
6e7d01
      */
6e7d01
     if (blocking) {
6e7d01
-        atomic_add(&ctx->notify_me, 2);
6e7d01
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
6e7d01
+        /*
6e7d01
+         * Write ctx->notify_me before computing the timeout
6e7d01
+         * (reading bottom half flags, etc.).  Pairs with
6e7d01
+         * smp_mb in aio_notify().
6e7d01
+         */
6e7d01
+        smp_mb();
6e7d01
     }
6e7d01
 
6e7d01
     qemu_lockcnt_inc(&ctx->list_lock);
6e7d01
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
6e7d01
         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
6e7d01
         if (blocking) {
6e7d01
             assert(first);
6e7d01
-            assert(in_aio_context_home_thread(ctx));
6e7d01
-            atomic_sub(&ctx->notify_me, 2);
6e7d01
+            atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
6e7d01
             aio_notify_accept(ctx);
6e7d01
         }
6e7d01
 
6e7d01
diff --git a/util/async.c b/util/async.c
6e7d01
index b1fa5319e5..c65c58bbc9 100644
6e7d01
--- a/util/async.c
6e7d01
+++ b/util/async.c
6e7d01
@@ -220,7 +220,14 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
6e7d01
 {
6e7d01
     AioContext *ctx = (AioContext *) source;
6e7d01
 
6e7d01
-    atomic_or(&ctx->notify_me, 1);
6e7d01
+    atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
6e7d01
+
6e7d01
+    /*
6e7d01
+     * Write ctx->notify_me before computing the timeout
6e7d01
+     * (reading bottom half flags, etc.).  Pairs with
6e7d01
+     * smp_mb in aio_notify().
6e7d01
+     */
6e7d01
+    smp_mb();
6e7d01
 
6e7d01
     /* We assume there is no timeout already supplied */
6e7d01
     *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
6e7d01
@@ -238,7 +245,8 @@ aio_ctx_check(GSource *source)
6e7d01
     AioContext *ctx = (AioContext *) source;
6e7d01
     QEMUBH *bh;
6e7d01
 
6e7d01
-    atomic_and(&ctx->notify_me, ~1);
6e7d01
+    /* Finish computing the timeout before clearing the flag.  */
6e7d01
+    atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
6e7d01
     aio_notify_accept(ctx);
6e7d01
 
6e7d01
     for (bh = ctx->first_bh; bh; bh = bh->next) {
6e7d01
@@ -343,10 +351,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
6e7d01
 void aio_notify(AioContext *ctx)
6e7d01
 {
6e7d01
     /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
6e7d01
-     * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
6e7d01
+     * with smp_mb in aio_ctx_prepare or aio_poll.
6e7d01
      */
6e7d01
     smp_mb();
6e7d01
-    if (ctx->notify_me) {
6e7d01
+    if (atomic_read(&ctx->notify_me)) {
6e7d01
         event_notifier_set(&ctx->notifier);
6e7d01
         atomic_mb_set(&ctx->notified, true);
6e7d01
     }
6e7d01
-- 
6e7d01
2.27.0
6e7d01