Blame SOURCES/kvm-file-posix-Fix-write_zeroes-with-unmap-on-block-devi.patch

8b1478
From 4704bbaba38bfeb1710b109f0b4b32a839b24734 Mon Sep 17 00:00:00 2001
8b1478
From: Maxim Levitsky <mlevitsk@redhat.com>
8b1478
Date: Wed, 5 Jun 2019 13:57:00 +0200
8b1478
Subject: [PATCH 12/23] file-posix: Fix write_zeroes with unmap on block
8b1478
 devices
8b1478
8b1478
RH-Author: Maxim Levitsky <mlevitsk@redhat.com>
8b1478
Message-id: <20190605135705.24526-5-mlevitsk@redhat.com>
8b1478
Patchwork-id: 88562
8b1478
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 4/9] file-posix: Fix write_zeroes with unmap on block devices
8b1478
Bugzilla: 1648622
8b1478
RH-Acked-by: Max Reitz <mreitz@redhat.com>
8b1478
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
8b1478
RH-Acked-by: John Snow <jsnow@redhat.com>
8b1478
8b1478
From: Kevin Wolf <kwolf@redhat.com>
8b1478
8b1478
The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as
8b1478
all-zero afterwards, so don't try to abuse it for zero writing. We try
8b1478
to only use this if BLKDISCARDZEROES tells us that it is safe, but this
8b1478
is unreliable on older kernels and a constant 0 in newer kernels. In
8b1478
other words, this code path is never actually used with newer kernels,
8b1478
so we don't even try to unmap while writing zeros.
8b1478
8b1478
This patch removes the abuse of discard for writing zeroes from
8b1478
file-posix and instead adds a new function that uses interfaces that are
8b1478
actually meant to deallocate and zero out at the same time. Only if
8b1478
those fail, it falls back to zeroing out without unmap. We never fall
8b1478
back to a discard operation any more that may or may not result in
8b1478
zeros.
8b1478
8b1478
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8b1478
8b1478
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622
8b1478
8b1478
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
8b1478
(Cherry picked from 34fa110e424e9a6a9b7e0274c3d4bfee766eb7ed)
8b1478
8b1478
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
8b1478
---
8b1478
 block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++--------------
8b1478
 1 file changed, 44 insertions(+), 15 deletions(-)
8b1478
8b1478
diff --git a/block/file-posix.c b/block/file-posix.c
8b1478
index 518f16b..5b93d06 100644
8b1478
--- a/block/file-posix.c
8b1478
+++ b/block/file-posix.c
8b1478
@@ -632,7 +632,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
8b1478
     }
8b1478
 #endif
8b1478
 
8b1478
-    bs->supported_zero_flags = s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : 0;
8b1478
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
8b1478
     ret = 0;
8b1478
 fail:
8b1478
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
8b1478
@@ -1530,6 +1530,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
8b1478
     return -ENOTSUP;
8b1478
 }
8b1478
 
8b1478
+static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
8b1478
+{
8b1478
+    BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
8b1478
+    int ret;
8b1478
+
8b1478
+    /* First try to write zeros and unmap at the same time */
8b1478
+
8b1478
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
8b1478
+    ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
8b1478
+                       aiocb->aio_offset, aiocb->aio_nbytes);
8b1478
+    if (ret != -ENOTSUP) {
8b1478
+        return ret;
8b1478
+    }
8b1478
+#endif
8b1478
+
8b1478
+#ifdef CONFIG_XFS
8b1478
+    if (s->is_xfs) {
8b1478
+        /* xfs_discard() guarantees that the discarded area reads as all-zero
8b1478
+         * afterwards, so we can use it here. */
8b1478
+        return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
8b1478
+    }
8b1478
+#endif
8b1478
+
8b1478
+    /* If we couldn't manage to unmap while guaranteed that the area reads as
8b1478
+     * all-zero afterwards, just write zeroes without unmapping */
8b1478
+    ret = handle_aiocb_write_zeroes(aiocb);
8b1478
+    return ret;
8b1478
+}
8b1478
+
8b1478
 #ifndef HAVE_COPY_FILE_RANGE
8b1478
 static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
8b1478
                              off_t *out_off, size_t len, unsigned int flags)
8b1478
@@ -1770,6 +1799,9 @@ static int aio_worker(void *arg)
8b1478
     case QEMU_AIO_WRITE_ZEROES:
8b1478
         ret = handle_aiocb_write_zeroes(aiocb);
8b1478
         break;
8b1478
+    case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
8b1478
+        ret = handle_aiocb_write_zeroes_unmap(aiocb);
8b1478
+        break;
8b1478
     case QEMU_AIO_COPY_RANGE:
8b1478
         ret = handle_aiocb_copy_range(aiocb);
8b1478
         break;
8b1478
@@ -2488,15 +2520,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(
8b1478
     int bytes, BdrvRequestFlags flags)
8b1478
 {
8b1478
     BDRVRawState *s = bs->opaque;
8b1478
+    int operation = QEMU_AIO_WRITE_ZEROES;
8b1478
 
8b1478
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
8b1478
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
8b1478
-                              QEMU_AIO_WRITE_ZEROES);
8b1478
-    } else if (s->discard_zeroes) {
8b1478
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
8b1478
-                              QEMU_AIO_DISCARD);
8b1478
+    if (flags & BDRV_REQ_MAY_UNMAP) {
8b1478
+        operation |= QEMU_AIO_DISCARD;
8b1478
     }
8b1478
-    return -ENOTSUP;
8b1478
+
8b1478
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
8b1478
 }
8b1478
 
8b1478
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
8b1478
@@ -3061,20 +3091,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
8b1478
     int64_t offset, int bytes, BdrvRequestFlags flags)
8b1478
 {
8b1478
     BDRVRawState *s = bs->opaque;
8b1478
+    int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
8b1478
     int rc;
8b1478
 
8b1478
     rc = fd_open(bs);
8b1478
     if (rc < 0) {
8b1478
         return rc;
8b1478
     }
8b1478
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
8b1478
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
8b1478
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
8b1478
-    } else if (s->discard_zeroes) {
8b1478
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
8b1478
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
8b1478
+
8b1478
+    if (flags & BDRV_REQ_MAY_UNMAP) {
8b1478
+        operation |= QEMU_AIO_DISCARD;
8b1478
     }
8b1478
-    return -ENOTSUP;
8b1478
+
8b1478
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
8b1478
 }
8b1478
 
8b1478
 static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
8b1478
-- 
8b1478
1.8.3.1
8b1478