958e1b
From 31d1d075a4cb1526d4c0ba43a8a6d581cedf0292 Mon Sep 17 00:00:00 2001
958e1b
From: Fam Zheng <famz@redhat.com>
958e1b
Date: Fri, 7 Nov 2014 10:09:29 +0100
958e1b
Subject: [PATCH 8/9] Revert "linux-aio: use event notifiers"
958e1b
958e1b
Message-id: <1415354969-14209-1-git-send-email-famz@redhat.com>
958e1b
Patchwork-id: 62180
958e1b
O-Subject: [RHEL-7.1 qemu-kvm PATCH v3] Revert "linux-aio: use event notifiers"
958e1b
Bugzilla: 1104748
958e1b
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
958e1b
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
958e1b
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
958e1b
958e1b
This reverts commit c90caf25e2b6945ae13560476a5ecd7992e9f945:
958e1b
958e1b
    linux-aio: use event notifiers
958e1b
958e1b
    Since linux-aio already uses an eventfd, converting it to use the
958e1b
    EventNotifier-based API simplifies the code even though it is not
958e1b
    meant to be portable.
958e1b
958e1b
    Reviewed-by: Anthony Liguori <anthony@codemonkey.ws>
958e1b
    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
958e1b
958e1b
Signed-off-by: Fam Zheng <famz@redhat.com>
958e1b
958e1b
Justification:
958e1b
958e1b
    There is a performance regression compared to RHEL 6 since we picked
958e1b
    this patch during RHEL 7 rebase. The bugzilla has more data but as a
958e1b
    quick overview we can see the difference is significant:
958e1b
958e1b
    Configuration   rw         bs         iodepth    bw         iops
958e1b
    ------------------------------------------------------------------
958e1b
    Before revert   randwrite  4k         32         579        148464
958e1b
    After revert    randwrite  4k         32         877        224752
958e1b
958e1b
    The reason is that before revert, we pass min_nr=MAX_EVENTS to
958e1b
    io_getevents, which is much slower (50000+ ns compared to a few
958e1b
    hundreds) than when min_nr is val, the number of events, after
958e1b
    revert.
958e1b
958e1b
    The decisive difference is that MAX_EVENTS is usually strictly
958e1b
    greater than the number of pending events, hence the kernel code
958e1b
    takes a different path into the hrtimer, despite timeout=0.
958e1b
958e1b
    In other words, the root cause is in kernel. A fix is posted to
958e1b
    upstream. But let's have this workaround in qemu-kvm anyway.
958e1b
958e1b
Upstream:
958e1b
958e1b
    The issue is silently compensated since cd758dd0aca (aio / timers:
958e1b
    Add prctl(PR_SET_TIMERSLACK, 1, ...) to reduce timer slack), and the
958e1b
    io_getevents call is moved to a BH, so changing min_nr back to 1 in
958e1b
    upstream is not demanding. As the ultimate fix in kernel is on its
958e1b
    way, reverting is a reasonable move for RHEL.
958e1b
958e1b
Signed-off-by: Fam Zheng <famz@redhat.com>
958e1b
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
958e1b
958e1b
Conflicts:
958e1b
	block/linux-aio.c
958e1b
Trivial context conflict in #include section.
958e1b
---
958e1b
 block/linux-aio.c | 49 ++++++++++++++++++++++++++++++-------------------
958e1b
 1 file changed, 30 insertions(+), 19 deletions(-)
958e1b
958e1b
diff --git a/block/linux-aio.c b/block/linux-aio.c
958e1b
index ee0f8d1..40041d1 100644
958e1b
--- a/block/linux-aio.c
958e1b
+++ b/block/linux-aio.c
958e1b
@@ -11,8 +11,8 @@
958e1b
 #include "block/aio.h"
958e1b
 #include "qemu/queue.h"
958e1b
 #include "block/raw-aio.h"
958e1b
-#include "qemu/event_notifier.h"
958e1b
 
958e1b
+#include <sys/eventfd.h>
958e1b
 #include <libaio.h>
958e1b
 
958e1b
 /*
958e1b
@@ -38,7 +38,7 @@ struct qemu_laiocb {
958e1b
 
958e1b
 struct qemu_laio_state {
958e1b
     io_context_t ctx;
958e1b
-    EventNotifier e;
958e1b
+    int efd;
958e1b
     int count;
958e1b
 };
958e1b
 
958e1b
@@ -77,17 +77,29 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
958e1b
     qemu_aio_release(laiocb);
958e1b
 }
958e1b
 
958e1b
-static void qemu_laio_completion_cb(EventNotifier *e)
958e1b
+static void qemu_laio_completion_cb(void *opaque)
958e1b
 {
958e1b
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
958e1b
+    struct qemu_laio_state *s = opaque;
958e1b
 
958e1b
-    while (event_notifier_test_and_clear(&s->e)) {
958e1b
+    while (1) {
958e1b
         struct io_event events[MAX_EVENTS];
958e1b
+        uint64_t val;
958e1b
+        ssize_t ret;
958e1b
         struct timespec ts = { 0 };
958e1b
         int nevents, i;
958e1b
 
958e1b
         do {
958e1b
-            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
958e1b
+            ret = read(s->efd, &val, sizeof(val));
958e1b
+        } while (ret == -1 && errno == EINTR);
958e1b
+
958e1b
+        if (ret == -1 && errno == EAGAIN)
958e1b
+            break;
958e1b
+
958e1b
+        if (ret != 8)
958e1b
+            break;
958e1b
+
958e1b
+        do {
958e1b
+            nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
958e1b
         } while (nevents == -EINTR);
958e1b
 
958e1b
         for (i = 0; i < nevents; i++) {
958e1b
@@ -101,9 +113,9 @@ static void qemu_laio_completion_cb(EventNotifier *e)
958e1b
     }
958e1b
 }
958e1b
 
958e1b
-static int qemu_laio_flush_cb(EventNotifier *e)
958e1b
+static int qemu_laio_flush_cb(void *opaque)
958e1b
 {
958e1b
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
958e1b
+    struct qemu_laio_state *s = opaque;
958e1b
 
958e1b
     return (s->count > 0) ? 1 : 0;
958e1b
 }
958e1b
@@ -135,9 +147,8 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
958e1b
      * We might be able to do this slightly more optimal by removing the
958e1b
      * O_NONBLOCK flag.
958e1b
      */
958e1b
-    while (laiocb->ret == -EINPROGRESS) {
958e1b
-        qemu_laio_completion_cb(&laiocb->ctx->e);
958e1b
-    }
958e1b
+    while (laiocb->ret == -EINPROGRESS)
958e1b
+        qemu_laio_completion_cb(laiocb->ctx);
958e1b
 }
958e1b
 
958e1b
 static const AIOCBInfo laio_aiocb_info = {
958e1b
@@ -176,7 +187,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
958e1b
                         __func__, type);
958e1b
         goto out_free_aiocb;
958e1b
     }
958e1b
-    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
958e1b
+    io_set_eventfd(&laiocb->iocb, s->efd);
958e1b
     s->count++;
958e1b
 
958e1b
     if (io_submit(s->ctx, 1, &iocbs) < 0)
958e1b
@@ -195,21 +206,21 @@ void *laio_init(void)
958e1b
     struct qemu_laio_state *s;
958e1b
 
958e1b
     s = g_malloc0(sizeof(*s));
958e1b
-    if (event_notifier_init(&s->e, false) < 0) {
958e1b
+    s->efd = eventfd(0, 0);
958e1b
+    if (s->efd == -1)
958e1b
         goto out_free_state;
958e1b
-    }
958e1b
+    fcntl(s->efd, F_SETFL, O_NONBLOCK);
958e1b
 
958e1b
-    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
958e1b
+    if (io_setup(MAX_EVENTS, &s->ctx) != 0)
958e1b
         goto out_close_efd;
958e1b
-    }
958e1b
 
958e1b
-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
958e1b
-                                qemu_laio_flush_cb);
958e1b
+    qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
958e1b
+        qemu_laio_flush_cb, s);
958e1b
 
958e1b
     return s;
958e1b
 
958e1b
 out_close_efd:
958e1b
-    event_notifier_cleanup(&s->e);
958e1b
+    close(s->efd);
958e1b
 out_free_state:
958e1b
     g_free(s);
958e1b
     return NULL;
958e1b
-- 
958e1b
1.8.3.1
958e1b