Pablo Greco e6a3ae
From 25bfe4a95b02b6fefafdfa1651c50a4d0c5bc87b Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Date: Tue, 23 Jul 2019 14:45:44 +0100
Pablo Greco e6a3ae
Subject: [PATCH 06/14] nbd/client: Support qemu-img convert from unaligned
Pablo Greco e6a3ae
 size
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Message-id: <20190723144546.23701-6-mreitz@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 89651
Pablo Greco e6a3ae
O-Subject: [RHEL-8.1.0 qemu-kvm PATCH 5/7] nbd/client: Support qemu-img convert from unaligned size
Pablo Greco e6a3ae
Bugzilla: 1678979
Pablo Greco e6a3ae
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: John Snow <jsnow@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: Eric Blake <eblake@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
If an NBD server advertises a size that is not a multiple of a sector,
Pablo Greco e6a3ae
the block layer rounds up that size, even though we set info.size to
Pablo Greco e6a3ae
the exact byte value sent by the server. The block layer then proceeds
Pablo Greco e6a3ae
to let us read or query block status on the hole that it added past
Pablo Greco e6a3ae
EOF, which the NBD server is unlikely to be happy with. Fortunately,
Pablo Greco e6a3ae
qemu as a server never advertizes an unaligned size, so we generally
Pablo Greco e6a3ae
don't run into this problem; but the nbdkit server makes it easy to
Pablo Greco e6a3ae
test:
Pablo Greco e6a3ae
Pablo Greco e6a3ae
$ printf %1000d 1 > f1
Pablo Greco e6a3ae
$ ~/nbdkit/nbdkit -fv file f1 & pid=$!
Pablo Greco e6a3ae
$ qemu-img convert -f raw nbd://localhost:10809 f2
Pablo Greco e6a3ae
$ kill $pid
Pablo Greco e6a3ae
$ qemu-img compare f1 f2
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Pre-patch, the server attempts a 1024-byte read, which nbdkit
Pablo Greco e6a3ae
rightfully rejects as going beyond its advertised 1000 byte size; the
Pablo Greco e6a3ae
conversion fails and the output files differ (not even the first
Pablo Greco e6a3ae
sector is copied, because qemu-img does not follow ddrescue's habit of
Pablo Greco e6a3ae
trying smaller reads to get as much information as possible in spite
Pablo Greco e6a3ae
of errors). Post-patch, the client's attempts to read (and query block
Pablo Greco e6a3ae
status, for new enough nbdkit) are properly truncated to the server's
Pablo Greco e6a3ae
length, with sane handling of the hole the block layer forced on
Pablo Greco e6a3ae
us. Although f2 ends up as a larger file (1024 bytes instead of 1000),
Pablo Greco e6a3ae
qemu-img compare shows the two images to have identical contents for
Pablo Greco e6a3ae
display to the guest.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
I didn't add iotests coverage since I didn't want to add a dependency
Pablo Greco e6a3ae
on nbdkit in iotests. I also did NOT patch write, trim, or write
Pablo Greco e6a3ae
zeroes - these commands continue to fail (usually with ENOSPC, but
Pablo Greco e6a3ae
whatever the server chose), because we really can't write to the end
Pablo Greco e6a3ae
of the file, and because 'qemu-img convert' is the most common case
Pablo Greco e6a3ae
where we care about being tolerant (which is read-only). Perhaps we
Pablo Greco e6a3ae
could truncate the request if the client is writing zeros to the tail,
Pablo Greco e6a3ae
but that seems like more work, especially if the block layer is fixed
Pablo Greco e6a3ae
in 4.1 to track byte-accurate sizing (in which case this patch would
Pablo Greco e6a3ae
be reverted as unnecessary).
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Signed-off-by: Eric Blake <eblake@redhat.com>
Pablo Greco e6a3ae
Message-Id: <20190329042750.14704-5-eblake@redhat.com>
Pablo Greco e6a3ae
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Pablo Greco e6a3ae
(cherry picked from commit 9cf638508c0090b33ada4155c7cbb684e08e5ee9)
Pablo Greco e6a3ae
Signed-off-by: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 block/nbd-client.c | 39 ++++++++++++++++++++++++++++++++++++++-
Pablo Greco e6a3ae
 1 file changed, 38 insertions(+), 1 deletion(-)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/block/nbd-client.c b/block/nbd-client.c
Pablo Greco e6a3ae
index 80d3625..6b33fe3 100644
Pablo Greco e6a3ae
--- a/block/nbd-client.c
Pablo Greco e6a3ae
+++ b/block/nbd-client.c
Pablo Greco e6a3ae
@@ -790,6 +790,25 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
Pablo Greco e6a3ae
     if (!bytes) {
Pablo Greco e6a3ae
         return 0;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * Work around the fact that the block layer doesn't do
Pablo Greco e6a3ae
+     * byte-accurate sizing yet - if the read exceeds the server's
Pablo Greco e6a3ae
+     * advertised size because the block layer rounded size up, then
Pablo Greco e6a3ae
+     * truncate the request to the server and tail-pad with zero.
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if (offset >= client->info.size) {
Pablo Greco e6a3ae
+        assert(bytes < BDRV_SECTOR_SIZE);
Pablo Greco e6a3ae
+        qemu_iovec_memset(qiov, 0, 0, bytes);
Pablo Greco e6a3ae
+        return 0;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+    if (offset + bytes > client->info.size) {
Pablo Greco e6a3ae
+        uint64_t slop = offset + bytes - client->info.size;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        assert(slop < BDRV_SECTOR_SIZE);
Pablo Greco e6a3ae
+        qemu_iovec_memset(qiov, bytes - slop, 0, slop);
Pablo Greco e6a3ae
+        request.len -= slop;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     ret = nbd_co_send_request(bs, &request, NULL);
Pablo Greco e6a3ae
     if (ret < 0) {
Pablo Greco e6a3ae
         return ret;
Pablo Greco e6a3ae
@@ -904,7 +923,8 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
Pablo Greco e6a3ae
         .from = offset,
Pablo Greco e6a3ae
         .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
Pablo Greco e6a3ae
                                                 bs->bl.request_alignment),
Pablo Greco e6a3ae
-                                client->info.max_block), bytes),
Pablo Greco e6a3ae
+                                client->info.max_block),
Pablo Greco e6a3ae
+                   MIN(bytes, client->info.size - offset)),
Pablo Greco e6a3ae
         .flags = NBD_CMD_FLAG_REQ_ONE,
Pablo Greco e6a3ae
     };
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -913,6 +933,23 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
Pablo Greco e6a3ae
         return BDRV_BLOCK_DATA;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    /*
Pablo Greco e6a3ae
+     * Work around the fact that the block layer doesn't do
Pablo Greco e6a3ae
+     * byte-accurate sizing yet - if the status request exceeds the
Pablo Greco e6a3ae
+     * server's advertised size because the block layer rounded size
Pablo Greco e6a3ae
+     * up, we truncated the request to the server (above), or are
Pablo Greco e6a3ae
+     * called on just the hole.
Pablo Greco e6a3ae
+     */
Pablo Greco e6a3ae
+    if (offset >= client->info.size) {
Pablo Greco e6a3ae
+        *pnum = bytes;
Pablo Greco e6a3ae
+        assert(bytes < BDRV_SECTOR_SIZE);
Pablo Greco e6a3ae
+        /* Intentionally don't report offset_valid for the hole */
Pablo Greco e6a3ae
+        return BDRV_BLOCK_ZERO;
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (client->info.min_block) {
Pablo Greco e6a3ae
+        assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
     ret = nbd_co_send_request(bs, &request, NULL);
Pablo Greco e6a3ae
     if (ret < 0) {
Pablo Greco e6a3ae
         return ret;
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae