Blame SOURCES/kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch

383d26
From 4704bbaba38bfeb1710b109f0b4b32a839b24734 Mon Sep 17 00:00:00 2001
383d26
From: Maxim Levitsky <mlevitsk@redhat.com>
383d26
Date: Wed, 5 Jun 2019 13:57:00 +0200
383d26
Subject: [PATCH 12/23] file-posix: Fix write_zeroes with unmap on block
383d26
 devices
383d26
383d26
RH-Author: Maxim Levitsky <mlevitsk@redhat.com>
383d26
Message-id: <20190605135705.24526-5-mlevitsk@redhat.com>
383d26
Patchwork-id: 88562
383d26
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 4/9] file-posix: Fix write_zeroes with unmap on block devices
383d26
Bugzilla: 1648622
383d26
RH-Acked-by: Max Reitz <mreitz@redhat.com>
383d26
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
RH-Acked-by: John Snow <jsnow@redhat.com>
383d26
383d26
From: Kevin Wolf <kwolf@redhat.com>
383d26
383d26
The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as
383d26
all-zero afterwards, so don't try to abuse it for zero writing. We try
383d26
to only use this if BLKDISCARDZEROES tells us that it is safe, but this
383d26
is unreliable on older kernels and a constant 0 in newer kernels. In
383d26
other words, this code path is never actually used with newer kernels,
383d26
so we don't even try to unmap while writing zeros.
383d26
383d26
This patch removes the abuse of discard for writing zeroes from
383d26
file-posix and instead adds a new function that uses interfaces that are
383d26
actually meant to deallocate and zero out at the same time. Only if
383d26
those fail, it falls back to zeroing out without unmap. We never fall
383d26
back to a discard operation any more that may or may not result in
383d26
zeros.
383d26
383d26
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
383d26
383d26
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622
383d26
383d26
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
383d26
(Cherry picked from 34fa110e424e9a6a9b7e0274c3d4bfee766eb7ed)
383d26
383d26
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
383d26
---
383d26
 block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++--------------
383d26
 1 file changed, 44 insertions(+), 15 deletions(-)
383d26
383d26
diff --git a/block/file-posix.c b/block/file-posix.c
383d26
index 518f16b..5b93d06 100644
383d26
--- a/block/file-posix.c
383d26
+++ b/block/file-posix.c
383d26
@@ -632,7 +632,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
383d26
     }
383d26
 #endif
383d26
 
383d26
-    bs->supported_zero_flags = s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : 0;
383d26
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
383d26
     ret = 0;
383d26
 fail:
383d26
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
383d26
@@ -1530,6 +1530,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
383d26
     return -ENOTSUP;
383d26
 }
383d26
 
383d26
+static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
383d26
+{
383d26
+    BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
383d26
+    int ret;
383d26
+
383d26
+    /* First try to write zeros and unmap at the same time */
383d26
+
383d26
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
383d26
+    ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
383d26
+                       aiocb->aio_offset, aiocb->aio_nbytes);
383d26
+    if (ret != -ENOTSUP) {
383d26
+        return ret;
383d26
+    }
383d26
+#endif
383d26
+
383d26
+#ifdef CONFIG_XFS
383d26
+    if (s->is_xfs) {
383d26
+        /* xfs_discard() guarantees that the discarded area reads as all-zero
383d26
+         * afterwards, so we can use it here. */
383d26
+        return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
383d26
+    }
383d26
+#endif
383d26
+
383d26
+    /* If we couldn't manage to unmap while guaranteed that the area reads as
383d26
+     * all-zero afterwards, just write zeroes without unmapping */
383d26
+    ret = handle_aiocb_write_zeroes(aiocb);
383d26
+    return ret;
383d26
+}
383d26
+
383d26
 #ifndef HAVE_COPY_FILE_RANGE
383d26
 static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
383d26
                              off_t *out_off, size_t len, unsigned int flags)
383d26
@@ -1770,6 +1799,9 @@ static int aio_worker(void *arg)
383d26
     case QEMU_AIO_WRITE_ZEROES:
383d26
         ret = handle_aiocb_write_zeroes(aiocb);
383d26
         break;
383d26
+    case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
383d26
+        ret = handle_aiocb_write_zeroes_unmap(aiocb);
383d26
+        break;
383d26
     case QEMU_AIO_COPY_RANGE:
383d26
         ret = handle_aiocb_copy_range(aiocb);
383d26
         break;
383d26
@@ -2488,15 +2520,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(
383d26
     int bytes, BdrvRequestFlags flags)
383d26
 {
383d26
     BDRVRawState *s = bs->opaque;
383d26
+    int operation = QEMU_AIO_WRITE_ZEROES;
383d26
 
383d26
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
383d26
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
383d26
-                              QEMU_AIO_WRITE_ZEROES);
383d26
-    } else if (s->discard_zeroes) {
383d26
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
383d26
-                              QEMU_AIO_DISCARD);
383d26
+    if (flags & BDRV_REQ_MAY_UNMAP) {
383d26
+        operation |= QEMU_AIO_DISCARD;
383d26
     }
383d26
-    return -ENOTSUP;
383d26
+
383d26
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
383d26
 }
383d26
 
383d26
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
383d26
@@ -3061,20 +3091,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
383d26
     int64_t offset, int bytes, BdrvRequestFlags flags)
383d26
 {
383d26
     BDRVRawState *s = bs->opaque;
383d26
+    int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
383d26
     int rc;
383d26
 
383d26
     rc = fd_open(bs);
383d26
     if (rc < 0) {
383d26
         return rc;
383d26
     }
383d26
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
383d26
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
383d26
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
383d26
-    } else if (s->discard_zeroes) {
383d26
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
383d26
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
383d26
+
383d26
+    if (flags & BDRV_REQ_MAY_UNMAP) {
383d26
+        operation |= QEMU_AIO_DISCARD;
383d26
     }
383d26
-    return -ENOTSUP;
383d26
+
383d26
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
383d26
 }
383d26
 
383d26
 static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
383d26
-- 
383d26
1.8.3.1
383d26