From 31d1d075a4cb1526d4c0ba43a8a6d581cedf0292 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 7 Nov 2014 10:09:29 +0100
Subject: [PATCH 8/9] Revert "linux-aio: use event notifiers"
Message-id: <1415354969-14209-1-git-send-email-famz@redhat.com>
Patchwork-id: 62180
O-Subject: [RHEL-7.1 qemu-kvm PATCH v3] Revert "linux-aio: use event notifiers"
Bugzilla: 1104748
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
This reverts commit c90caf25e2b6945ae13560476a5ecd7992e9f945:
linux-aio: use event notifiers
Since linux-aio already uses an eventfd, converting it to use the
EventNotifier-based API simplifies the code even though it is not
meant to be portable.
Reviewed-by: Anthony Liguori <anthony@codemonkey.ws>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Justification:
There is a performance regression compared to RHEL 6 since we picked
this patch during RHEL 7 rebase. The bugzilla has more data but as a
quick overview we can see the difference is significant:
Configuration rw bs iodepth bw iops
------------------------------------------------------------------
Before revert randwrite 4k 32 579 148464
After revert randwrite 4k 32 877 224752
The reason is that before revert, we pass min_nr=MAX_EVENTS to
io_getevents, which is much slower (50000+ ns compared to a few
hundreds) than when min_nr is val, the number of events, after
revert.
The decisive difference is that MAX_EVENTS is usually strictly
greater than the number of pending events, hence the kernel code
takes a different path into the hrtimer, despite timeout=0.
In other words, the root cause is in kernel. A fix is posted to
upstream. But let's have this workaround in qemu-kvm anyway.
Upstream:
The issue is silently compensated since cd758dd0aca (aio / timers:
Add prctl(PR_SET_TIMERSLACK, 1, ...) to reduce timer slack), and the
io_getevents call is moved to a BH, so changing min_nr back to 1 in
upstream is not demanding. As the ultimate fix in kernel is on its
way, reverting is a reasonable move for RHEL.
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Conflicts:
block/linux-aio.c
Trivial context conflict in #include section.
---
block/linux-aio.c | 49 ++++++++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 19 deletions(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index ee0f8d1..40041d1 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,8 +11,8 @@
#include "block/aio.h"
#include "qemu/queue.h"
#include "block/raw-aio.h"
-#include "qemu/event_notifier.h"
+#include <sys/eventfd.h>
#include <libaio.h>
/*
@@ -38,7 +38,7 @@ struct qemu_laiocb {
struct qemu_laio_state {
io_context_t ctx;
- EventNotifier e;
+ int efd;
int count;
};
@@ -77,17 +77,29 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
qemu_aio_release(laiocb);
}
-static void qemu_laio_completion_cb(EventNotifier *e)
+static void qemu_laio_completion_cb(void *opaque)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+ struct qemu_laio_state *s = opaque;
- while (event_notifier_test_and_clear(&s->e)) {
+ while (1) {
struct io_event events[MAX_EVENTS];
+ uint64_t val;
+ ssize_t ret;
struct timespec ts = { 0 };
int nevents, i;
do {
- nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
+ ret = read(s->efd, &val, sizeof(val));
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1 && errno == EAGAIN)
+ break;
+
+ if (ret != 8)
+ break;
+
+ do {
+ nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
} while (nevents == -EINTR);
for (i = 0; i < nevents; i++) {
@@ -101,9 +113,9 @@ static void qemu_laio_completion_cb(EventNotifier *e)
}
}
-static int qemu_laio_flush_cb(EventNotifier *e)
+static int qemu_laio_flush_cb(void *opaque)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+ struct qemu_laio_state *s = opaque;
return (s->count > 0) ? 1 : 0;
}
@@ -135,9 +147,8 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
* We might be able to do this slightly more optimal by removing the
* O_NONBLOCK flag.
*/
- while (laiocb->ret == -EINPROGRESS) {
- qemu_laio_completion_cb(&laiocb->ctx->e);
- }
+ while (laiocb->ret == -EINPROGRESS)
+ qemu_laio_completion_cb(laiocb->ctx);
}
static const AIOCBInfo laio_aiocb_info = {
@@ -176,7 +187,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
__func__, type);
goto out_free_aiocb;
}
- io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+ io_set_eventfd(&laiocb->iocb, s->efd);
s->count++;
if (io_submit(s->ctx, 1, &iocbs) < 0)
@@ -195,21 +206,21 @@ void *laio_init(void)
struct qemu_laio_state *s;
s = g_malloc0(sizeof(*s));
- if (event_notifier_init(&s->e, false) < 0) {
+ s->efd = eventfd(0, 0);
+ if (s->efd == -1)
goto out_free_state;
- }
+ fcntl(s->efd, F_SETFL, O_NONBLOCK);
- if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
+ if (io_setup(MAX_EVENTS, &s->ctx) != 0)
goto out_close_efd;
- }
- qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
- qemu_laio_flush_cb);
+ qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
+ qemu_laio_flush_cb, s);
return s;
out_close_efd:
- event_notifier_cleanup(&s->e);
+ close(s->efd);
out_free_state:
g_free(s);
return NULL;
--
1.8.3.1