Blame SOURCES/kvm-coroutine-abort-if-we-try-to-schedule-or-enter-a-pen.patch

4a2fec
From ab88ad152dbdf6e906da07e2c165f01985edfe13 Mon Sep 17 00:00:00 2001
4a2fec
From: Jeffrey Cody <jcody@redhat.com>
4a2fec
Date: Thu, 30 Nov 2017 22:49:08 +0100
4a2fec
Subject: [PATCH 04/21] coroutine: abort if we try to schedule or enter a
4a2fec
 pending coroutine
4a2fec
4a2fec
RH-Author: Jeffrey Cody <jcody@redhat.com>
4a2fec
Message-id: <27811c58f7dd73e9d8b6f51aff42a7de9bae37fc.1511985875.git.jcody@redhat.com>
4a2fec
Patchwork-id: 78044
4a2fec
O-Subject: [RHV7.5 qemu-kvm-rhev PATCH 04/11] coroutine: abort if we try to schedule or enter a pending coroutine
4a2fec
Bugzilla: 1506531
4a2fec
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
4a2fec
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
4a2fec
RH-Acked-by: John Snow <jsnow@redhat.com>
4a2fec
4a2fec
The previous patch fixed a race condition, in which there were
4a2fec
coroutines being executing doubly, or after coroutine deletion.
4a2fec
4a2fec
We can detect common scenarios when this happens, and print an error
4a2fec
message and abort before we corrupt memory / data, or segfault.
4a2fec
4a2fec
This patch will abort if an attempt to enter a coroutine is made while
4a2fec
it is currently pending execution, either in a specific AioContext bh,
4a2fec
or pending execution via a timer.  It will also abort if a coroutine
4a2fec
is scheduled, before a prior scheduled run has occurred.
4a2fec
4a2fec
We cannot rely on the existing co->caller check for recursive re-entry
4a2fec
to catch this, as the coroutine may run and exit with
4a2fec
COROUTINE_TERMINATE before the scheduled coroutine executes.
4a2fec
4a2fec
(This is the scenario that was occurring and fixed in the previous
4a2fec
patch).
4a2fec
4a2fec
This patch also re-orders the Coroutine struct elements in an attempt to
4a2fec
optimize caching.
4a2fec
4a2fec
Signed-off-by: Jeff Cody <jcody@redhat.com>
4a2fec
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4a2fec
(cherry picked from commit 6133b39f3c36623425a6ede9e89d93175fde15cd)
4a2fec
Signed-off-by: Jeff Cody <jcody@redhat.com>
4a2fec
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
4a2fec
---
4a2fec
 include/qemu/coroutine_int.h | 13 ++++++++++---
4a2fec
 util/async.c                 | 13 +++++++++++++
4a2fec
 util/qemu-coroutine-sleep.c  | 12 ++++++++++++
4a2fec
 util/qemu-coroutine.c        | 14 ++++++++++++++
4a2fec
 4 files changed, 49 insertions(+), 3 deletions(-)
4a2fec
4a2fec
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
4a2fec
index cb98892..59e8406 100644
4a2fec
--- a/include/qemu/coroutine_int.h
4a2fec
+++ b/include/qemu/coroutine_int.h
4a2fec
@@ -46,14 +46,21 @@ struct Coroutine {
4a2fec
 
4a2fec
     size_t locks_held;
4a2fec
 
4a2fec
+    /* Only used when the coroutine has yielded.  */
4a2fec
+    AioContext *ctx;
4a2fec
+
4a2fec
+    /* Used to catch and abort on illegal co-routine entry.
4a2fec
+     * Will contain the name of the function that had first
4a2fec
+     * scheduled the coroutine. */
4a2fec
+    const char *scheduled;
4a2fec
+
4a2fec
+    QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
4a2fec
+
4a2fec
     /* Coroutines that should be woken up when we yield or terminate.
4a2fec
      * Only used when the coroutine is running.
4a2fec
      */
4a2fec
     QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
4a2fec
 
4a2fec
-    /* Only used when the coroutine has yielded.  */
4a2fec
-    AioContext *ctx;
4a2fec
-    QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
4a2fec
     QSLIST_ENTRY(Coroutine) co_scheduled_next;
4a2fec
 };
4a2fec
 
4a2fec
diff --git a/util/async.c b/util/async.c
4a2fec
index 0e1bd87..4dd9d95 100644
4a2fec
--- a/util/async.c
4a2fec
+++ b/util/async.c
4a2fec
@@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque)
4a2fec
         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
4a2fec
         trace_aio_co_schedule_bh_cb(ctx, co);
4a2fec
         aio_context_acquire(ctx);
4a2fec
+
4a2fec
+        /* Protected by write barrier in qemu_aio_coroutine_enter */
4a2fec
+        atomic_set(&co->scheduled, NULL);
4a2fec
         qemu_coroutine_enter(co);
4a2fec
         aio_context_release(ctx);
4a2fec
     }
4a2fec
@@ -438,6 +441,16 @@ fail:
4a2fec
 void aio_co_schedule(AioContext *ctx, Coroutine *co)
4a2fec
 {
4a2fec
     trace_aio_co_schedule(ctx, co);
4a2fec
+    const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
4a2fec
+                                           __func__);
4a2fec
+
4a2fec
+    if (scheduled) {
4a2fec
+        fprintf(stderr,
4a2fec
+                "%s: Co-routine was already scheduled in '%s'\n",
4a2fec
+                __func__, scheduled);
4a2fec
+        abort();
4a2fec
+    }
4a2fec
+
4a2fec
     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
4a2fec
                               co, co_scheduled_next);
4a2fec
     qemu_bh_schedule(ctx->co_schedule_bh);
4a2fec
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
4a2fec
index 9c56550..254349c 100644
4a2fec
--- a/util/qemu-coroutine-sleep.c
4a2fec
+++ b/util/qemu-coroutine-sleep.c
4a2fec
@@ -13,6 +13,7 @@
4a2fec
 
4a2fec
 #include "qemu/osdep.h"
4a2fec
 #include "qemu/coroutine.h"
4a2fec
+#include "qemu/coroutine_int.h"
4a2fec
 #include "qemu/timer.h"
4a2fec
 #include "block/aio.h"
4a2fec
 
4a2fec
@@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque)
4a2fec
 {
4a2fec
     CoSleepCB *sleep_cb = opaque;
4a2fec
 
4a2fec
+    /* Write of schedule protected by barrier write in aio_co_schedule */
4a2fec
+    atomic_set(&sleep_cb->co->scheduled, NULL);
4a2fec
     aio_co_wake(sleep_cb->co);
4a2fec
 }
4a2fec
 
4a2fec
@@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
4a2fec
     CoSleepCB sleep_cb = {
4a2fec
         .co = qemu_coroutine_self(),
4a2fec
     };
4a2fec
+
4a2fec
+    const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
4a2fec
+                                           __func__);
4a2fec
+    if (scheduled) {
4a2fec
+        fprintf(stderr,
4a2fec
+                "%s: Co-routine was already scheduled in '%s'\n",
4a2fec
+                __func__, scheduled);
4a2fec
+        abort();
4a2fec
+    }
4a2fec
     sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
4a2fec
     timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
4a2fec
     qemu_coroutine_yield();
4a2fec
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
4a2fec
index d6095c1..9eff7fd 100644
4a2fec
--- a/util/qemu-coroutine.c
4a2fec
+++ b/util/qemu-coroutine.c
4a2fec
@@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
4a2fec
     Coroutine *self = qemu_coroutine_self();
4a2fec
     CoroutineAction ret;
4a2fec
 
4a2fec
+    /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
4a2fec
+     * callers outside of aio_co_wake() */
4a2fec
+    const char *scheduled = atomic_mb_read(&co->scheduled);
4a2fec
+
4a2fec
     trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
4a2fec
 
4a2fec
+    /* if the Coroutine has already been scheduled, entering it again will
4a2fec
+     * cause us to enter it twice, potentially even after the coroutine has
4a2fec
+     * been deleted */
4a2fec
+    if (scheduled) {
4a2fec
+        fprintf(stderr,
4a2fec
+                "%s: Co-routine was already scheduled in '%s'\n",
4a2fec
+                __func__, scheduled);
4a2fec
+        abort();
4a2fec
+    }
4a2fec
+
4a2fec
     if (co->caller) {
4a2fec
         fprintf(stderr, "Co-routine re-entered recursively\n");
4a2fec
         abort();
4a2fec
-- 
4a2fec
1.8.3.1
4a2fec