0a122b
From ddaa3bc746bdf8974337f176e3841b7c27d7253e Mon Sep 17 00:00:00 2001
0a122b
From: Kevin Wolf <kwolf@redhat.com>
0a122b
Date: Tue, 3 Dec 2013 16:34:41 +0100
0a122b
Subject: [PATCH 24/37] block: Align requests in bdrv_co_do_pwritev()
0a122b
0a122b
Message-id: <1392117622-28812-25-git-send-email-kwolf@redhat.com>
0a122b
Patchwork-id: 57189
0a122b
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 24/37] block: Align requests in bdrv_co_do_pwritev()
0a122b
Bugzilla: 748906
0a122b
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
0a122b
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
0a122b
RH-Acked-by: Max Reitz <mreitz@redhat.com>
0a122b
0a122b
This patch changes bdrv_co_do_pwritev() to actually be what its name
0a122b
promises. If requests aren't properly aligned, it performs a RMW.
0a122b
0a122b
Requests touching the same block are serialised against the RMW request.
0a122b
Further optimisation of this is possible by differentiating types of
0a122b
requests (concurrent reads should actually be okay here).
0a122b
0a122b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
0a122b
Reviewed-by: Max Reitz <mreitz@redhat.com>
0a122b
Reviewed-by: Benoit Canet <benoit@irqsave.net>
0a122b
(cherry picked from commit 3b8242e0ea2a2c201ef3d1bd24080490dae33080)
0a122b
0a122b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
0a122b
---
0a122b
 block.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
0a122b
 1 file changed, 85 insertions(+), 1 deletion(-)
0a122b
---
0a122b
 block.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
0a122b
 1 files changed, 85 insertions(+), 1 deletions(-)
0a122b
0a122b
diff --git a/block.c b/block.c
0a122b
index fd37037..3ec3949 100644
0a122b
--- a/block.c
0a122b
+++ b/block.c
0a122b
@@ -3078,6 +3078,12 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
0a122b
     BdrvRequestFlags flags)
0a122b
 {
0a122b
     BdrvTrackedRequest req;
0a122b
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
0a122b
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
0a122b
+    uint8_t *head_buf = NULL;
0a122b
+    uint8_t *tail_buf = NULL;
0a122b
+    QEMUIOVector local_qiov;
0a122b
+    bool use_local_qiov = false;
0a122b
     int ret;
0a122b
 
0a122b
     if (!bs->drv) {
0a122b
@@ -3096,10 +3102,88 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
0a122b
         bdrv_io_limits_intercept(bs, true, bytes >> BDRV_SECTOR_BITS);
0a122b
     }
0a122b
 
0a122b
+    /*
0a122b
+     * Align write if necessary by performing a read-modify-write cycle.
0a122b
+     * Pad qiov with the read parts and be sure to have a tracked request not
0a122b
+     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
0a122b
+     */
0a122b
     tracked_request_begin(&req, bs, offset, bytes, true);
0a122b
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, qiov, flags);
0a122b
+
0a122b
+    if (offset & (align - 1)) {
0a122b
+        QEMUIOVector head_qiov;
0a122b
+        struct iovec head_iov;
0a122b
+
0a122b
+        mark_request_serialising(&req, align);
0a122b
+        wait_serialising_requests(&req);
0a122b
+
0a122b
+        head_buf = qemu_blockalign(bs, align);
0a122b
+        head_iov = (struct iovec) {
0a122b
+            .iov_base   = head_buf,
0a122b
+            .iov_len    = align,
0a122b
+        };
0a122b
+        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
0a122b
+
0a122b
+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
0a122b
+                                  align, &head_qiov, 0);
0a122b
+        if (ret < 0) {
0a122b
+            goto fail;
0a122b
+        }
0a122b
+
0a122b
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
0a122b
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
0a122b
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
0a122b
+        use_local_qiov = true;
0a122b
+
0a122b
+        bytes += offset & (align - 1);
0a122b
+        offset = offset & ~(align - 1);
0a122b
+    }
0a122b
+
0a122b
+    if ((offset + bytes) & (align - 1)) {
0a122b
+        QEMUIOVector tail_qiov;
0a122b
+        struct iovec tail_iov;
0a122b
+        size_t tail_bytes;
0a122b
+
0a122b
+        mark_request_serialising(&req, align);
0a122b
+        wait_serialising_requests(&req);
0a122b
+
0a122b
+        tail_buf = qemu_blockalign(bs, align);
0a122b
+        tail_iov = (struct iovec) {
0a122b
+            .iov_base   = tail_buf,
0a122b
+            .iov_len    = align,
0a122b
+        };
0a122b
+        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
0a122b
+
0a122b
+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
0a122b
+                                  align, &tail_qiov, 0);
0a122b
+        if (ret < 0) {
0a122b
+            goto fail;
0a122b
+        }
0a122b
+
0a122b
+        if (!use_local_qiov) {
0a122b
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
0a122b
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
0a122b
+            use_local_qiov = true;
0a122b
+        }
0a122b
+
0a122b
+        tail_bytes = (offset + bytes) & (align - 1);
0a122b
+        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
0a122b
+
0a122b
+        bytes = ROUND_UP(bytes, align);
0a122b
+    }
0a122b
+
0a122b
+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
0a122b
+                               use_local_qiov ? &local_qiov : qiov,
0a122b
+                               flags);
0a122b
+
0a122b
+fail:
0a122b
     tracked_request_end(&req);
0a122b
 
0a122b
+    if (use_local_qiov) {
0a122b
+        qemu_iovec_destroy(&local_qiov);
0a122b
+        qemu_vfree(head_buf);
0a122b
+        qemu_vfree(tail_buf);
0a122b
+    }
0a122b
+
0a122b
     return ret;
0a122b
 }
0a122b
 
0a122b
-- 
0a122b
1.7.1
0a122b