|
|
9ae3a8 |
From 31d1d075a4cb1526d4c0ba43a8a6d581cedf0292 Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Fam Zheng <famz@redhat.com>
|
|
|
9ae3a8 |
Date: Fri, 7 Nov 2014 10:09:29 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 8/9] Revert "linux-aio: use event notifiers"
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Message-id: <1415354969-14209-1-git-send-email-famz@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 62180
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.1 qemu-kvm PATCH v3] Revert "linux-aio: use event notifiers"
|
|
|
9ae3a8 |
Bugzilla: 1104748
|
|
|
9ae3a8 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
This reverts commit c90caf25e2b6945ae13560476a5ecd7992e9f945:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
linux-aio: use event notifiers
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Since linux-aio already uses an eventfd, converting it to use the
|
|
|
9ae3a8 |
EventNotifier-based API simplifies the code even though it is not
|
|
|
9ae3a8 |
meant to be portable.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Reviewed-by: Anthony Liguori <anthony@codemonkey.ws>
|
|
|
9ae3a8 |
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Justification:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
There is a performance regression compared to RHEL 6 since we picked
|
|
|
9ae3a8 |
this patch during RHEL 7 rebase. The bugzilla has more data but as a
|
|
|
9ae3a8 |
quick overview we can see the difference is significant:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Configuration rw bs iodepth bw iops
|
|
|
9ae3a8 |
------------------------------------------------------------------
|
|
|
9ae3a8 |
Before revert randwrite 4k 32 579 148464
|
|
|
9ae3a8 |
After revert randwrite 4k 32 877 224752
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The reason is that before revert, we pass min_nr=MAX_EVENTS to
|
|
|
9ae3a8 |
io_getevents, which is much slower (50000+ ns compared to a few
|
|
|
9ae3a8 |
hundreds) than when min_nr is val, the number of events, after
|
|
|
9ae3a8 |
revert.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The decisive difference is that MAX_EVENTS is usually strictly
|
|
|
9ae3a8 |
greater than the number of pending events, hence the kernel code
|
|
|
9ae3a8 |
takes a different path into the hrtimer, despite timeout=0.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
In other words, the root cause is in kernel. A fix is posted to
|
|
|
9ae3a8 |
upstream. But let's have this workaround in qemu-kvm anyway.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Upstream:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The issue is silently compensated since cd758dd0aca (aio / timers:
|
|
|
9ae3a8 |
Add prctl(PR_SET_TIMERSLACK, 1, ...) to reduce timer slack), and the
|
|
|
9ae3a8 |
io_getevents call is moved to a BH, so changing min_nr back to 1 in
|
|
|
9ae3a8 |
upstream is not demanding. As the ultimate fix in kernel is on its
|
|
|
9ae3a8 |
way, reverting is a reasonable move for RHEL.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Conflicts:
|
|
|
9ae3a8 |
block/linux-aio.c
|
|
|
9ae3a8 |
Trivial context conflict in #include section.
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block/linux-aio.c | 49 ++++++++++++++++++++++++++++++-------------------
|
|
|
9ae3a8 |
1 file changed, 30 insertions(+), 19 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/block/linux-aio.c b/block/linux-aio.c
|
|
|
9ae3a8 |
index ee0f8d1..40041d1 100644
|
|
|
9ae3a8 |
--- a/block/linux-aio.c
|
|
|
9ae3a8 |
+++ b/block/linux-aio.c
|
|
|
9ae3a8 |
@@ -11,8 +11,8 @@
|
|
|
9ae3a8 |
#include "block/aio.h"
|
|
|
9ae3a8 |
#include "qemu/queue.h"
|
|
|
9ae3a8 |
#include "block/raw-aio.h"
|
|
|
9ae3a8 |
-#include "qemu/event_notifier.h"
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+#include <sys/eventfd.h>
|
|
|
9ae3a8 |
#include <libaio.h>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/*
|
|
|
9ae3a8 |
@@ -38,7 +38,7 @@ struct qemu_laiocb {
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
struct qemu_laio_state {
|
|
|
9ae3a8 |
io_context_t ctx;
|
|
|
9ae3a8 |
- EventNotifier e;
|
|
|
9ae3a8 |
+ int efd;
|
|
|
9ae3a8 |
int count;
|
|
|
9ae3a8 |
};
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
@@ -77,17 +77,29 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
|
|
|
9ae3a8 |
qemu_aio_release(laiocb);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-static void qemu_laio_completion_cb(EventNotifier *e)
|
|
|
9ae3a8 |
+static void qemu_laio_completion_cb(void *opaque)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
|
|
|
9ae3a8 |
+ struct qemu_laio_state *s = opaque;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- while (event_notifier_test_and_clear(&s->e)) {
|
|
|
9ae3a8 |
+ while (1) {
|
|
|
9ae3a8 |
struct io_event events[MAX_EVENTS];
|
|
|
9ae3a8 |
+ uint64_t val;
|
|
|
9ae3a8 |
+ ssize_t ret;
|
|
|
9ae3a8 |
struct timespec ts = { 0 };
|
|
|
9ae3a8 |
int nevents, i;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
do {
|
|
|
9ae3a8 |
- nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
|
|
|
9ae3a8 |
+ ret = read(s->efd, &val, sizeof(val));
|
|
|
9ae3a8 |
+ } while (ret == -1 && errno == EINTR);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (ret == -1 && errno == EAGAIN)
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (ret != 8)
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ do {
|
|
|
9ae3a8 |
+ nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
|
|
|
9ae3a8 |
} while (nevents == -EINTR);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
for (i = 0; i < nevents; i++) {
|
|
|
9ae3a8 |
@@ -101,9 +113,9 @@ static void qemu_laio_completion_cb(EventNotifier *e)
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-static int qemu_laio_flush_cb(EventNotifier *e)
|
|
|
9ae3a8 |
+static int qemu_laio_flush_cb(void *opaque)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
|
|
|
9ae3a8 |
+ struct qemu_laio_state *s = opaque;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
return (s->count > 0) ? 1 : 0;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
@@ -135,9 +147,8 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
|
|
|
9ae3a8 |
* We might be able to do this slightly more optimal by removing the
|
|
|
9ae3a8 |
* O_NONBLOCK flag.
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
- while (laiocb->ret == -EINPROGRESS) {
|
|
|
9ae3a8 |
- qemu_laio_completion_cb(&laiocb->ctx->e);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
+ while (laiocb->ret == -EINPROGRESS)
|
|
|
9ae3a8 |
+ qemu_laio_completion_cb(laiocb->ctx);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
static const AIOCBInfo laio_aiocb_info = {
|
|
|
9ae3a8 |
@@ -176,7 +187,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
|
|
|
9ae3a8 |
__func__, type);
|
|
|
9ae3a8 |
goto out_free_aiocb;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
- io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
|
|
|
9ae3a8 |
+ io_set_eventfd(&laiocb->iocb, s->efd);
|
|
|
9ae3a8 |
s->count++;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (io_submit(s->ctx, 1, &iocbs) < 0)
|
|
|
9ae3a8 |
@@ -195,21 +206,21 @@ void *laio_init(void)
|
|
|
9ae3a8 |
struct qemu_laio_state *s;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
s = g_malloc0(sizeof(*s));
|
|
|
9ae3a8 |
- if (event_notifier_init(&s->e, false) < 0) {
|
|
|
9ae3a8 |
+ s->efd = eventfd(0, 0);
|
|
|
9ae3a8 |
+ if (s->efd == -1)
|
|
|
9ae3a8 |
goto out_free_state;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
+ fcntl(s->efd, F_SETFL, O_NONBLOCK);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
|
|
|
9ae3a8 |
+ if (io_setup(MAX_EVENTS, &s->ctx) != 0)
|
|
|
9ae3a8 |
goto out_close_efd;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
|
|
|
9ae3a8 |
- qemu_laio_flush_cb);
|
|
|
9ae3a8 |
+ qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
|
|
|
9ae3a8 |
+ qemu_laio_flush_cb, s);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
return s;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
out_close_efd:
|
|
|
9ae3a8 |
- event_notifier_cleanup(&s->e);
|
|
|
9ae3a8 |
+ close(s->efd);
|
|
|
9ae3a8 |
out_free_state:
|
|
|
9ae3a8 |
g_free(s);
|
|
|
9ae3a8 |
return NULL;
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.8.3.1
|
|
|
9ae3a8 |
|