Blame SOURCES/kvm-coroutine-abort-if-we-try-to-schedule-or-enter-a-pen.patch

9bac43
From ab88ad152dbdf6e906da07e2c165f01985edfe13 Mon Sep 17 00:00:00 2001
9bac43
From: Jeffrey Cody <jcody@redhat.com>
9bac43
Date: Thu, 30 Nov 2017 22:49:08 +0100
9bac43
Subject: [PATCH 04/21] coroutine: abort if we try to schedule or enter a
9bac43
 pending coroutine
9bac43
9bac43
RH-Author: Jeffrey Cody <jcody@redhat.com>
9bac43
Message-id: <27811c58f7dd73e9d8b6f51aff42a7de9bae37fc.1511985875.git.jcody@redhat.com>
9bac43
Patchwork-id: 78044
9bac43
O-Subject: [RHV7.5 qemu-kvm-rhev PATCH 04/11] coroutine: abort if we try to schedule or enter a pending coroutine
9bac43
Bugzilla: 1506531
9bac43
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
9bac43
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9bac43
RH-Acked-by: John Snow <jsnow@redhat.com>
9bac43
9bac43
The previous patch fixed a race condition, in which there were
9bac43
coroutines being executing doubly, or after coroutine deletion.
9bac43
9bac43
We can detect common scenarios when this happens, and print an error
9bac43
message and abort before we corrupt memory / data, or segfault.
9bac43
9bac43
This patch will abort if an attempt to enter a coroutine is made while
9bac43
it is currently pending execution, either in a specific AioContext bh,
9bac43
or pending execution via a timer.  It will also abort if a coroutine
9bac43
is scheduled, before a prior scheduled run has occurred.
9bac43
9bac43
We cannot rely on the existing co->caller check for recursive re-entry
9bac43
to catch this, as the coroutine may run and exit with
9bac43
COROUTINE_TERMINATE before the scheduled coroutine executes.
9bac43
9bac43
(This is the scenario that was occurring and fixed in the previous
9bac43
patch).
9bac43
9bac43
This patch also re-orders the Coroutine struct elements in an attempt to
9bac43
optimize caching.
9bac43
9bac43
Signed-off-by: Jeff Cody <jcody@redhat.com>
9bac43
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9bac43
(cherry picked from commit 6133b39f3c36623425a6ede9e89d93175fde15cd)
9bac43
Signed-off-by: Jeff Cody <jcody@redhat.com>
9bac43
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9bac43
---
9bac43
 include/qemu/coroutine_int.h | 13 ++++++++++---
9bac43
 util/async.c                 | 13 +++++++++++++
9bac43
 util/qemu-coroutine-sleep.c  | 12 ++++++++++++
9bac43
 util/qemu-coroutine.c        | 14 ++++++++++++++
9bac43
 4 files changed, 49 insertions(+), 3 deletions(-)
9bac43
9bac43
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
9bac43
index cb98892..59e8406 100644
9bac43
--- a/include/qemu/coroutine_int.h
9bac43
+++ b/include/qemu/coroutine_int.h
9bac43
@@ -46,14 +46,21 @@ struct Coroutine {
9bac43
 
9bac43
     size_t locks_held;
9bac43
 
9bac43
+    /* Only used when the coroutine has yielded.  */
9bac43
+    AioContext *ctx;
9bac43
+
9bac43
+    /* Used to catch and abort on illegal co-routine entry.
9bac43
+     * Will contain the name of the function that had first
9bac43
+     * scheduled the coroutine. */
9bac43
+    const char *scheduled;
9bac43
+
9bac43
+    QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
9bac43
+
9bac43
     /* Coroutines that should be woken up when we yield or terminate.
9bac43
      * Only used when the coroutine is running.
9bac43
      */
9bac43
     QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
9bac43
 
9bac43
-    /* Only used when the coroutine has yielded.  */
9bac43
-    AioContext *ctx;
9bac43
-    QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
9bac43
     QSLIST_ENTRY(Coroutine) co_scheduled_next;
9bac43
 };
9bac43
 
9bac43
diff --git a/util/async.c b/util/async.c
9bac43
index 0e1bd87..4dd9d95 100644
9bac43
--- a/util/async.c
9bac43
+++ b/util/async.c
9bac43
@@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque)
9bac43
         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
9bac43
         trace_aio_co_schedule_bh_cb(ctx, co);
9bac43
         aio_context_acquire(ctx);
9bac43
+
9bac43
+        /* Protected by write barrier in qemu_aio_coroutine_enter */
9bac43
+        atomic_set(&co->scheduled, NULL);
9bac43
         qemu_coroutine_enter(co);
9bac43
         aio_context_release(ctx);
9bac43
     }
9bac43
@@ -438,6 +441,16 @@ fail:
9bac43
 void aio_co_schedule(AioContext *ctx, Coroutine *co)
9bac43
 {
9bac43
     trace_aio_co_schedule(ctx, co);
9bac43
+    const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
9bac43
+                                           __func__);
9bac43
+
9bac43
+    if (scheduled) {
9bac43
+        fprintf(stderr,
9bac43
+                "%s: Co-routine was already scheduled in '%s'\n",
9bac43
+                __func__, scheduled);
9bac43
+        abort();
9bac43
+    }
9bac43
+
9bac43
     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
9bac43
                               co, co_scheduled_next);
9bac43
     qemu_bh_schedule(ctx->co_schedule_bh);
9bac43
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
9bac43
index 9c56550..254349c 100644
9bac43
--- a/util/qemu-coroutine-sleep.c
9bac43
+++ b/util/qemu-coroutine-sleep.c
9bac43
@@ -13,6 +13,7 @@
9bac43
 
9bac43
 #include "qemu/osdep.h"
9bac43
 #include "qemu/coroutine.h"
9bac43
+#include "qemu/coroutine_int.h"
9bac43
 #include "qemu/timer.h"
9bac43
 #include "block/aio.h"
9bac43
 
9bac43
@@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque)
9bac43
 {
9bac43
     CoSleepCB *sleep_cb = opaque;
9bac43
 
9bac43
+    /* Write of schedule protected by barrier write in aio_co_schedule */
9bac43
+    atomic_set(&sleep_cb->co->scheduled, NULL);
9bac43
     aio_co_wake(sleep_cb->co);
9bac43
 }
9bac43
 
9bac43
@@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
9bac43
     CoSleepCB sleep_cb = {
9bac43
         .co = qemu_coroutine_self(),
9bac43
     };
9bac43
+
9bac43
+    const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
9bac43
+                                           __func__);
9bac43
+    if (scheduled) {
9bac43
+        fprintf(stderr,
9bac43
+                "%s: Co-routine was already scheduled in '%s'\n",
9bac43
+                __func__, scheduled);
9bac43
+        abort();
9bac43
+    }
9bac43
     sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
9bac43
     timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
9bac43
     qemu_coroutine_yield();
9bac43
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
9bac43
index d6095c1..9eff7fd 100644
9bac43
--- a/util/qemu-coroutine.c
9bac43
+++ b/util/qemu-coroutine.c
9bac43
@@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
9bac43
     Coroutine *self = qemu_coroutine_self();
9bac43
     CoroutineAction ret;
9bac43
 
9bac43
+    /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
9bac43
+     * callers outside of aio_co_wake() */
9bac43
+    const char *scheduled = atomic_mb_read(&co->scheduled);
9bac43
+
9bac43
     trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
9bac43
 
9bac43
+    /* if the Coroutine has already been scheduled, entering it again will
9bac43
+     * cause us to enter it twice, potentially even after the coroutine has
9bac43
+     * been deleted */
9bac43
+    if (scheduled) {
9bac43
+        fprintf(stderr,
9bac43
+                "%s: Co-routine was already scheduled in '%s'\n",
9bac43
+                __func__, scheduled);
9bac43
+        abort();
9bac43
+    }
9bac43
+
9bac43
     if (co->caller) {
9bac43
         fprintf(stderr, "Co-routine re-entered recursively\n");
9bac43
         abort();
9bac43
-- 
9bac43
1.8.3.1
9bac43