9ae3a8
From 8603c409443521f98a73652ebf30233da879c7de Mon Sep 17 00:00:00 2001
9ae3a8
From: Kevin Wolf <kwolf@redhat.com>
9ae3a8
Date: Thu, 28 Nov 2013 10:23:32 +0100
9ae3a8
Subject: [PATCH 08/37] block: Don't use guest sector size for qemu_blockalign()
9ae3a8
9ae3a8
Message-id: <1392117622-28812-9-git-send-email-kwolf@redhat.com>
9ae3a8
Patchwork-id: 57173
9ae3a8
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 08/37] block: Don't use guest sector size for qemu_blockalign()
9ae3a8
Bugzilla: 748906
9ae3a8
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
9ae3a8
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9ae3a8
RH-Acked-by: Max Reitz <mreitz@redhat.com>
9ae3a8
9ae3a8
bs->buffer_alignment is set by the device emulation and contains the
9ae3a8
logical block size of the guest device. This isn't something that the
9ae3a8
block layer should know, and even less something to use for determining
9ae3a8
the right alignment of buffers to be used for the host.
9ae3a8
9ae3a8
The new BlockLimits field opt_mem_alignment tells the qemu block layer
9ae3a8
the optimal alignment to be used so that no bounce buffer must be used
9ae3a8
in the driver.
9ae3a8
9ae3a8
This patch may change the buffer alignment from 4k to 512 for all
9ae3a8
callers that used qemu_blockalign() with the top-level image format
9ae3a8
BlockDriverState. The value was never propagated to other levels in the
9ae3a8
tree, so in particular raw-posix never required anything else than 512.
9ae3a8
9ae3a8
While on disks with 4k sectors direct I/O requires a 4k alignment,
9ae3a8
memory may still be okay when aligned to 512 byte boundaries. This is
9ae3a8
what must have happened in practice, because otherwise this would
9ae3a8
already have failed earlier. Therefore I don't expect regressions even
9ae3a8
with this intermediate state. Later, raw-posix can implement the hook
9ae3a8
and expose a different memory alignment requirement.
9ae3a8
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
Reviewed-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
9ae3a8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9ae3a8
(cherry picked from commit 339064d5063924e5176842abbf6c8089f3479c5b)
9ae3a8
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
---
9ae3a8
 block.c                   | 23 ++++++++++++++++++++---
9ae3a8
 include/block/block.h     |  3 +++
9ae3a8
 include/block/block_int.h |  3 +++
9ae3a8
 3 files changed, 26 insertions(+), 3 deletions(-)
9ae3a8
---
9ae3a8
 block.c                   |   23 ++++++++++++++++++++---
9ae3a8
 include/block/block.h     |    3 +++
9ae3a8
 include/block/block_int.h |    3 +++
9ae3a8
 3 files changed, 26 insertions(+), 3 deletions(-)
9ae3a8
9ae3a8
diff --git a/block.c b/block.c
9ae3a8
index 6c98ff7..c85f43e 100644
9ae3a8
--- a/block.c
9ae3a8
+++ b/block.c
9ae3a8
@@ -185,6 +185,16 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
9ae3a8
     qemu_co_queue_next(&bs->throttled_reqs);
9ae3a8
 }
9ae3a8
 
9ae3a8
+size_t bdrv_opt_mem_align(BlockDriverState *bs)
9ae3a8
+{
9ae3a8
+    if (!bs || !bs->drv) {
9ae3a8
+        /* 4k should be on the safe side */
9ae3a8
+        return 4096;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    return bs->bl.opt_mem_alignment;
9ae3a8
+}
9ae3a8
+
9ae3a8
 /* check if the path starts with "<protocol>:" */
9ae3a8
 static int path_has_protocol(const char *path)
9ae3a8
 {
9ae3a8
@@ -460,6 +470,9 @@ int bdrv_refresh_limits(BlockDriverState *bs)
9ae3a8
     if (bs->file) {
9ae3a8
         bdrv_refresh_limits(bs->file);
9ae3a8
         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
9ae3a8
+        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
9ae3a8
+    } else {
9ae3a8
+        bs->bl.opt_mem_alignment = 512;
9ae3a8
     }
9ae3a8
 
9ae3a8
     if (bs->backing_hd) {
9ae3a8
@@ -467,6 +480,9 @@ int bdrv_refresh_limits(BlockDriverState *bs)
9ae3a8
         bs->bl.opt_transfer_length =
9ae3a8
             MAX(bs->bl.opt_transfer_length,
9ae3a8
                 bs->backing_hd->bl.opt_transfer_length);
9ae3a8
+        bs->bl.opt_mem_alignment =
9ae3a8
+            MAX(bs->bl.opt_mem_alignment,
9ae3a8
+                bs->backing_hd->bl.opt_mem_alignment);
9ae3a8
     }
9ae3a8
 
9ae3a8
     /* Then let the driver override it */
9ae3a8
@@ -4729,7 +4745,7 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
9ae3a8
 
9ae3a8
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
9ae3a8
 {
9ae3a8
-    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
9ae3a8
+    return qemu_memalign(bdrv_opt_mem_align(bs), size);
9ae3a8
 }
9ae3a8
 
9ae3a8
 /*
9ae3a8
@@ -4738,12 +4754,13 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
9ae3a8
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
9ae3a8
 {
9ae3a8
     int i;
9ae3a8
+    size_t alignment = bdrv_opt_mem_align(bs);
9ae3a8
 
9ae3a8
     for (i = 0; i < qiov->niov; i++) {
9ae3a8
-        if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
9ae3a8
+        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
9ae3a8
             return false;
9ae3a8
         }
9ae3a8
-        if (qiov->iov[i].iov_len % bs->buffer_alignment) {
9ae3a8
+        if (qiov->iov[i].iov_len % alignment) {
9ae3a8
             return false;
9ae3a8
         }
9ae3a8
     }
9ae3a8
diff --git a/include/block/block.h b/include/block/block.h
9ae3a8
index 3e22fc2..9d1ce59 100644
9ae3a8
--- a/include/block/block.h
9ae3a8
+++ b/include/block/block.h
9ae3a8
@@ -406,6 +406,9 @@ void bdrv_img_create(const char *filename, const char *fmt,
9ae3a8
                      char *options, uint64_t img_size, int flags,
9ae3a8
                      Error **errp, bool quiet);
9ae3a8
 
9ae3a8
+/* Returns the alignment in bytes that is required so that no bounce buffer
9ae3a8
+ * is required throughout the stack */
9ae3a8
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
9ae3a8
 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
9ae3a8
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
9ae3a8
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
9ae3a8
diff --git a/include/block/block_int.h b/include/block/block_int.h
9ae3a8
index 13a52e8..ddbd53d 100644
9ae3a8
--- a/include/block/block_int.h
9ae3a8
+++ b/include/block/block_int.h
9ae3a8
@@ -239,6 +239,9 @@ typedef struct BlockLimits {
9ae3a8
 
9ae3a8
     /* optimal transfer length in sectors */
9ae3a8
     int opt_transfer_length;
9ae3a8
+
9ae3a8
+    /* memory alignment so that no bounce buffer is needed */
9ae3a8
+    size_t opt_mem_alignment;
9ae3a8
 } BlockLimits;
9ae3a8
 
9ae3a8
 /*
9ae3a8
-- 
9ae3a8
1.7.1
9ae3a8