|
|
8b1478 |
From 29c28722af50333db06c4c2497852896cf15ca23 Mon Sep 17 00:00:00 2001
|
|
|
8b1478 |
From: Max Reitz <mreitz@redhat.com>
|
|
|
8b1478 |
Date: Fri, 23 Aug 2019 15:11:10 +0200
|
|
|
8b1478 |
Subject: [PATCH 3/4] file-posix: Handle undetectable alignment
|
|
|
8b1478 |
|
|
|
8b1478 |
RH-Author: Max Reitz <mreitz@redhat.com>
|
|
|
8b1478 |
Message-id: <20190823151110.17322-2-mreitz@redhat.com>
|
|
|
8b1478 |
Patchwork-id: 90139
|
|
|
8b1478 |
O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH 1/1] file-posix: Handle undetectable alignment
|
|
|
8b1478 |
Bugzilla: 1743365
|
|
|
8b1478 |
RH-Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
From: Nir Soffer <nirsof@gmail.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
In some cases buf_align or request_alignment cannot be detected:
|
|
|
8b1478 |
|
|
|
8b1478 |
1. With Gluster, buf_align cannot be detected since the actual I/O is
|
|
|
8b1478 |
done on Gluster server, and qemu buffer alignment does not matter.
|
|
|
8b1478 |
Since we don't have alignment requirement, buf_align=1 is the best
|
|
|
8b1478 |
value.
|
|
|
8b1478 |
|
|
|
8b1478 |
2. With local XFS filesystem, buf_align cannot be detected if reading
|
|
|
8b1478 |
from unallocated area. In this we must align the buffer, but we don't
|
|
|
8b1478 |
know what is the correct size. Using the wrong alignment results in
|
|
|
8b1478 |
I/O error.
|
|
|
8b1478 |
|
|
|
8b1478 |
3. With Gluster backed by XFS, request_alignment cannot be detected if
|
|
|
8b1478 |
reading from unallocated area. In this case we need to use the
|
|
|
8b1478 |
correct alignment, and failing to do so results in I/O errors.
|
|
|
8b1478 |
|
|
|
8b1478 |
4. With NFS, the server does not use direct I/O, so both buf_align cannot
|
|
|
8b1478 |
be detected. In this case we don't need any alignment so we can use
|
|
|
8b1478 |
buf_align=1 and request_alignment=1.
|
|
|
8b1478 |
|
|
|
8b1478 |
These cases seems to work when storage sector size is 512 bytes, because
|
|
|
8b1478 |
the current code starts checking align=512. If the check succeeds
|
|
|
8b1478 |
because alignment cannot be detected we use 512. But this does not work
|
|
|
8b1478 |
for storage with 4k sector size.
|
|
|
8b1478 |
|
|
|
8b1478 |
To determine if we can detect the alignment, we probe first with
|
|
|
8b1478 |
align=1. If probing succeeds, maybe there are no alignment requirement
|
|
|
8b1478 |
(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we
|
|
|
8b1478 |
don't have any way to tell, we treat this as undetectable alignment. If
|
|
|
8b1478 |
probing with align=1 fails with EINVAL, but probing with one of the
|
|
|
8b1478 |
expected alignments succeeds, we know that we found a working alignment.
|
|
|
8b1478 |
|
|
|
8b1478 |
Practically the alignment requirements are the same for buffer
|
|
|
8b1478 |
alignment, buffer length, and offset in file. So in case we cannot
|
|
|
8b1478 |
detect buf_align, we can use request alignment. If we cannot detect
|
|
|
8b1478 |
request alignment, we can fallback to a safe value. To use this logic,
|
|
|
8b1478 |
we probe first request alignment instead of buf_align.
|
|
|
8b1478 |
|
|
|
8b1478 |
Here is a table showing the behaviour with current code (the value in
|
|
|
8b1478 |
parenthesis is the optimal value).
|
|
|
8b1478 |
|
|
|
8b1478 |
Case Sector buf_align (opt) request_alignment (opt) result
|
|
|
8b1478 |
|
|
|
8b1478 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
8b1478 |
---
|
|
|
8b1478 |
block/file-posix.c | 36 +++++++++++++++++++++++++-----------
|
|
|
8b1478 |
1 file changed, 25 insertions(+), 11 deletions(-)
|
|
|
8b1478 |
|
|
|
8b1478 |
diff --git a/block/file-posix.c b/block/file-posix.c
|
|
|
8b1478 |
index d1926b3..548424d 100644
|
|
|
8b1478 |
--- a/block/file-posix.c
|
|
|
8b1478 |
+++ b/block/file-posix.c
|
|
|
8b1478 |
@@ -325,6 +325,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
|
|
8b1478 |
BDRVRawState *s = bs->opaque;
|
|
|
8b1478 |
char *buf;
|
|
|
8b1478 |
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
|
|
|
8b1478 |
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
|
|
|
8b1478 |
|
|
|
8b1478 |
/* For SCSI generic devices the alignment is not really used.
|
|
|
8b1478 |
With buffered I/O, we don't have any restrictions. */
|
|
|
8b1478 |
@@ -351,25 +352,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
|
|
|
8b1478 |
}
|
|
|
8b1478 |
#endif
|
|
|
8b1478 |
|
|
|
8b1478 |
- /* If we could not get the sizes so far, we can only guess them */
|
|
|
8b1478 |
- if (!s->buf_align) {
|
|
|
8b1478 |
+ /*
|
|
|
8b1478 |
+ * If we could not get the sizes so far, we can only guess them. First try
|
|
|
8b1478 |
+ * to detect request alignment, since it is more likely to succeed. Then
|
|
|
8b1478 |
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
|
|
|
8b1478 |
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
|
|
|
8b1478 |
+ * request_alignment.
|
|
|
8b1478 |
+ */
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ if (!bs->bl.request_alignment) {
|
|
|
8b1478 |
+ int i;
|
|
|
8b1478 |
size_t align;
|
|
|
8b1478 |
- buf = qemu_memalign(max_align, 2 * max_align);
|
|
|
8b1478 |
- for (align = 512; align <= max_align; align <<= 1) {
|
|
|
8b1478 |
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
|
|
8b1478 |
- s->buf_align = align;
|
|
|
8b1478 |
+ buf = qemu_memalign(max_align, max_align);
|
|
|
8b1478 |
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
|
|
8b1478 |
+ align = alignments[i];
|
|
|
8b1478 |
+ if (raw_is_io_aligned(fd, buf, align)) {
|
|
|
8b1478 |
+ /* Fallback to safe value. */
|
|
|
8b1478 |
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
|
|
|
8b1478 |
break;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
}
|
|
|
8b1478 |
qemu_vfree(buf);
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
- if (!bs->bl.request_alignment) {
|
|
|
8b1478 |
+ if (!s->buf_align) {
|
|
|
8b1478 |
+ int i;
|
|
|
8b1478 |
size_t align;
|
|
|
8b1478 |
- buf = qemu_memalign(s->buf_align, max_align);
|
|
|
8b1478 |
- for (align = 512; align <= max_align; align <<= 1) {
|
|
|
8b1478 |
- if (raw_is_io_aligned(fd, buf, align)) {
|
|
|
8b1478 |
- bs->bl.request_alignment = align;
|
|
|
8b1478 |
+ buf = qemu_memalign(max_align, 2 * max_align);
|
|
|
8b1478 |
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
|
|
|
8b1478 |
+ align = alignments[i];
|
|
|
8b1478 |
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
|
|
|
8b1478 |
+ /* Fallback to request_aligment. */
|
|
|
8b1478 |
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
|
|
|
8b1478 |
break;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
}
|
|
|
8b1478 |
--
|
|
|
8b1478 |
1.8.3.1
|
|
|
8b1478 |
|