|
|
b38b0f |
From 5935958fc4eb9934b1493486a69f0f571e7da112 Mon Sep 17 00:00:00 2001
|
|
|
b38b0f |
From: Thomas Huth <thuth@redhat.com>
|
|
|
b38b0f |
Date: Fri, 30 Aug 2019 12:56:24 +0100
|
|
|
b38b0f |
Subject: [PATCH 06/10] file-posix: Handle undetectable alignment
|
|
|
b38b0f |
|
|
|
b38b0f |
RH-Author: Thomas Huth <thuth@redhat.com>
|
|
|
b38b0f |
Message-id: <20190830125628.23668-2-thuth@redhat.com>
|
|
|
b38b0f |
Patchwork-id: 90209
|
|
|
b38b0f |
O-Subject: [RHEL-8.1.0 qemu-kvm PATCH v2 1/5] file-posix: Handle undetectable alignment
|
|
|
b38b0f |
Bugzilla: 1738839
|
|
|
b38b0f |
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
|
|
b38b0f |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
b38b0f |
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
|
|
b38b0f |
|
|
|
b38b0f |
In some cases buf_align or request_alignment cannot be detected:
|
|
|
b38b0f |
|
|
|
b38b0f |
1. With Gluster, buf_align cannot be detected since the actual I/O is
|
|
|
b38b0f |
done on Gluster server, and qemu buffer alignment does not matter.
|
|
|
b38b0f |
Since we don't have alignment requirement, buf_align=1 is the best
|
|
|
b38b0f |
value.
|
|
|
b38b0f |
|
|
|
b38b0f |
2. With local XFS filesystem, buf_align cannot be detected if reading
|
|
|
b38b0f |
from unallocated area. In this we must align the buffer, but we don't
|
|
|
b38b0f |
know what is the correct size. Using the wrong alignment results in
|
|
|
b38b0f |
I/O error.
|
|
|
b38b0f |
|
|
|
b38b0f |
3. With Gluster backed by XFS, request_alignment cannot be detected if
|
|
|
b38b0f |
reading from unallocated area. In this case we need to use the
|
|
|
b38b0f |
correct alignment, and failing to do so results in I/O errors.
|
|
|
b38b0f |
|
|
|
b38b0f |
4. With NFS, the server does not use direct I/O, so both buf_align cannot
|
|
|
b38b0f |
be detected. In this case we don't need any alignment so we can use
|
|
|
b38b0f |
buf_align=1 and request_alignment=1.
|
|
|
b38b0f |
|
|
|
b38b0f |
These cases seems to work when storage sector size is 512 bytes, because
|
|
|
b38b0f |
the current code starts checking align=512. If the check succeeds
|
|
|
b38b0f |
because alignment cannot be detected we use 512. But this does not work
|
|
|
b38b0f |
for storage with 4k sector size.
|
|
|
b38b0f |
|
|
|
b38b0f |
To determine if we can detect the alignment, we probe first with
|
|
|
b38b0f |
align=1. If probing succeeds, maybe there are no alignment requirement
|
|
|
b38b0f |
(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we
|
|
|
b38b0f |
don't have any way to tell, we treat this as undetectable alignment. If
|
|
|
b38b0f |
probing with align=1 fails with EINVAL, but probing with one of the
|
|
|
b38b0f |
expected alignments succeeds, we know that we found a working alignment.
|
|
|
b38b0f |
|
|
|
b38b0f |
Practically the alignment requirements are the same for buffer
|
|
|
b38b0f |
alignment, buffer length, and offset in file. So in case we cannot
|
|
|
b38b0f |
detect buf_align, we can use request alignment. If we cannot detect
|
|
|
b38b0f |
request alignment, we can fallback to a safe value. To use this logic,
|
|
|
b38b0f |
we probe first request alignment instead of buf_align.
|
|
|
b38b0f |
|
|
|
b38b0f |
Here is a table showing the behaviour with current code (the value in
|
|
|
b38b0f |
parenthesis is the optimal value).
|
|
|
b38b0f |
|
|
|
b38b0f |
Case Sector buf_align (opt) request_alignment (opt) result
|
|
|
b38b0f |
|
|
|
b38b0f |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
b38b0f |
---
|
|
|
b38b0f |
block/file-posix.c | 36 +++++++++++++++++++++++++-----------
|
|
|
b38b0f |
1 file changed, 25 insertions(+), 11 deletions(-)
|
|
|
b38b0f |
|
|
|
b38b0f |
diff --git a/block/file-posix.c b/block/file-posix.c
|
|
|
b38b0f |
index 4b404e4..84c5a31 100644
|
|
|
b38b0f |
--- a/block/file-posix.c
|
|
|
b38b0f |
+++ b/block/file-posix.c
|
|
|
b38b0f |
@@ -324,6 +324,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
|
|
b38b0f |
BDRVRawState *s = bs->opaque;
|
|
|
b38b0f |
char *buf;
|
|
|
b38b0f |
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
|
|
|
b38b0f |
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
|
|
|
b38b0f |
|
|
|
b38b0f |
/* For SCSI generic devices the alignment is not really used.
|
|
|
b38b0f |
With buffered I/O, we don't have any restrictions. */
|
|
|
b38b0f |
@@ -350,25 +351,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
|
|
b38b0f |
}
|
|
|
b38b0f |
#endif
|
|
|
b38b0f |
|
|
|
b38b0f |
- /* If we could not get the sizes so far, we can only guess them */
|
|
|
b38b0f |
- if (!s->buf_align) {
|
|
|
b38b0f |
+ /*
|
|
|
b38b0f |
+ * If we could not get the sizes so far, we can only guess them. First try
|
|
|
b38b0f |
+ * to detect request alignment, since it is more likely to succeed. Then
|
|
|
b38b0f |
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
|
|
|
b38b0f |
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
|
|
|
b38b0f |
+ * request_alignment.
|
|
|
b38b0f |
+ */
|
|
|
b38b0f |
+
|
|
|
b38b0f |
+ if (!bs->bl.request_alignment) {
|
|
|
b38b0f |
+ int i;
|
|
|
b38b0f |
size_t align;
|
|
|
b38b0f |
- buf = qemu_memalign(max_align, 2 * max_align);
|
|
|
b38b0f |
- for (align = 512; align <= max_align; align <<= 1) {
|
|
|
b38b0f |
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
|
|
b38b0f |
- s->buf_align = align;
|
|
|
b38b0f |
+ buf = qemu_memalign(max_align, max_align);
|
|
|
b38b0f |
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
|
|
b38b0f |
+ align = alignments[i];
|
|
|
b38b0f |
+ if (raw_is_io_aligned(fd, buf, align)) {
|
|
|
b38b0f |
+ /* Fallback to safe value. */
|
|
|
b38b0f |
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
|
|
|
b38b0f |
break;
|
|
|
b38b0f |
}
|
|
|
b38b0f |
}
|
|
|
b38b0f |
qemu_vfree(buf);
|
|
|
b38b0f |
}
|
|
|
b38b0f |
|
|
|
b38b0f |
- if (!bs->bl.request_alignment) {
|
|
|
b38b0f |
+ if (!s->buf_align) {
|
|
|
b38b0f |
+ int i;
|
|
|
b38b0f |
size_t align;
|
|
|
b38b0f |
- buf = qemu_memalign(s->buf_align, max_align);
|
|
|
b38b0f |
- for (align = 512; align <= max_align; align <<= 1) {
|
|
|
b38b0f |
- if (raw_is_io_aligned(fd, buf, align)) {
|
|
|
b38b0f |
- bs->bl.request_alignment = align;
|
|
|
b38b0f |
+ buf = qemu_memalign(max_align, 2 * max_align);
|
|
|
b38b0f |
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
|
|
b38b0f |
+ align = alignments[i];
|
|
|
b38b0f |
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
|
|
b38b0f |
+ /* Fallback to request_aligment. */
|
|
|
b38b0f |
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
|
|
|
b38b0f |
break;
|
|
|
b38b0f |
}
|
|
|
b38b0f |
}
|
|
|
b38b0f |
--
|
|
|
b38b0f |
1.8.3.1
|
|
|
b38b0f |
|