|
|
8b1478 |
From 4704bbaba38bfeb1710b109f0b4b32a839b24734 Mon Sep 17 00:00:00 2001
|
|
|
8b1478 |
From: Maxim Levitsky <mlevitsk@redhat.com>
|
|
|
8b1478 |
Date: Wed, 5 Jun 2019 13:57:00 +0200
|
|
|
8b1478 |
Subject: [PATCH 12/23] file-posix: Fix write_zeroes with unmap on block
|
|
|
8b1478 |
devices
|
|
|
8b1478 |
|
|
|
8b1478 |
RH-Author: Maxim Levitsky <mlevitsk@redhat.com>
|
|
|
8b1478 |
Message-id: <20190605135705.24526-5-mlevitsk@redhat.com>
|
|
|
8b1478 |
Patchwork-id: 88562
|
|
|
8b1478 |
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 4/9] file-posix: Fix write_zeroes with unmap on block devices
|
|
|
8b1478 |
Bugzilla: 1648622
|
|
|
8b1478 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as
|
|
|
8b1478 |
all-zero afterwards, so don't try to abuse it for zero writing. We try
|
|
|
8b1478 |
to only use this if BLKDISCARDZEROES tells us that it is safe, but this
|
|
|
8b1478 |
is unreliable on older kernels and a constant 0 in newer kernels. In
|
|
|
8b1478 |
other words, this code path is never actually used with newer kernels,
|
|
|
8b1478 |
so we don't even try to unmap while writing zeros.
|
|
|
8b1478 |
|
|
|
8b1478 |
This patch removes the abuse of discard for writing zeroes from
|
|
|
8b1478 |
file-posix and instead adds a new function that uses interfaces that are
|
|
|
8b1478 |
actually meant to deallocate and zero out at the same time. Only if
|
|
|
8b1478 |
those fail, it falls back to zeroing out without unmap. We never fall
|
|
|
8b1478 |
back to a discard operation any more that may or may not result in
|
|
|
8b1478 |
zeros.
|
|
|
8b1478 |
|
|
|
8b1478 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1648622
|
|
|
8b1478 |
|
|
|
8b1478 |
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
|
|
|
8b1478 |
(Cherry picked from 34fa110e424e9a6a9b7e0274c3d4bfee766eb7ed)
|
|
|
8b1478 |
|
|
|
8b1478 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
8b1478 |
---
|
|
|
8b1478 |
block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++--------------
|
|
|
8b1478 |
1 file changed, 44 insertions(+), 15 deletions(-)
|
|
|
8b1478 |
|
|
|
8b1478 |
diff --git a/block/file-posix.c b/block/file-posix.c
|
|
|
8b1478 |
index 518f16b..5b93d06 100644
|
|
|
8b1478 |
--- a/block/file-posix.c
|
|
|
8b1478 |
+++ b/block/file-posix.c
|
|
|
8b1478 |
@@ -632,7 +632,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
|
|
8b1478 |
}
|
|
|
8b1478 |
#endif
|
|
|
8b1478 |
|
|
|
8b1478 |
- bs->supported_zero_flags = s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : 0;
|
|
|
8b1478 |
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
|
|
|
8b1478 |
ret = 0;
|
|
|
8b1478 |
fail:
|
|
|
8b1478 |
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
|
|
|
8b1478 |
@@ -1530,6 +1530,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
|
|
|
8b1478 |
return -ENOTSUP;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
+static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
|
|
|
8b1478 |
+{
|
|
|
8b1478 |
+ BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
|
|
|
8b1478 |
+ int ret;
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ /* First try to write zeros and unmap at the same time */
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
|
|
8b1478 |
+ ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
|
|
8b1478 |
+ aiocb->aio_offset, aiocb->aio_nbytes);
|
|
|
8b1478 |
+ if (ret != -ENOTSUP) {
|
|
|
8b1478 |
+ return ret;
|
|
|
8b1478 |
+ }
|
|
|
8b1478 |
+#endif
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+#ifdef CONFIG_XFS
|
|
|
8b1478 |
+ if (s->is_xfs) {
|
|
|
8b1478 |
+ /* xfs_discard() guarantees that the discarded area reads as all-zero
|
|
|
8b1478 |
+ * afterwards, so we can use it here. */
|
|
|
8b1478 |
+ return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
|
|
|
8b1478 |
+ }
|
|
|
8b1478 |
+#endif
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ /* If we couldn't manage to unmap while guaranteed that the area reads as
|
|
|
8b1478 |
+ * all-zero afterwards, just write zeroes without unmapping */
|
|
|
8b1478 |
+ ret = handle_aiocb_write_zeroes(aiocb);
|
|
|
8b1478 |
+ return ret;
|
|
|
8b1478 |
+}
|
|
|
8b1478 |
+
|
|
|
8b1478 |
#ifndef HAVE_COPY_FILE_RANGE
|
|
|
8b1478 |
static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
|
|
|
8b1478 |
off_t *out_off, size_t len, unsigned int flags)
|
|
|
8b1478 |
@@ -1770,6 +1799,9 @@ static int aio_worker(void *arg)
|
|
|
8b1478 |
case QEMU_AIO_WRITE_ZEROES:
|
|
|
8b1478 |
ret = handle_aiocb_write_zeroes(aiocb);
|
|
|
8b1478 |
break;
|
|
|
8b1478 |
+ case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
|
|
|
8b1478 |
+ ret = handle_aiocb_write_zeroes_unmap(aiocb);
|
|
|
8b1478 |
+ break;
|
|
|
8b1478 |
case QEMU_AIO_COPY_RANGE:
|
|
|
8b1478 |
ret = handle_aiocb_copy_range(aiocb);
|
|
|
8b1478 |
break;
|
|
|
8b1478 |
@@ -2488,15 +2520,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(
|
|
|
8b1478 |
int bytes, BdrvRequestFlags flags)
|
|
|
8b1478 |
{
|
|
|
8b1478 |
BDRVRawState *s = bs->opaque;
|
|
|
8b1478 |
+ int operation = QEMU_AIO_WRITE_ZEROES;
|
|
|
8b1478 |
|
|
|
8b1478 |
- if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
|
|
8b1478 |
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
|
|
|
8b1478 |
- QEMU_AIO_WRITE_ZEROES);
|
|
|
8b1478 |
- } else if (s->discard_zeroes) {
|
|
|
8b1478 |
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
|
|
|
8b1478 |
- QEMU_AIO_DISCARD);
|
|
|
8b1478 |
+ if (flags & BDRV_REQ_MAY_UNMAP) {
|
|
|
8b1478 |
+ operation |= QEMU_AIO_DISCARD;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
- return -ENOTSUP;
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
|
|
8b1478 |
@@ -3061,20 +3091,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
|
|
|
8b1478 |
int64_t offset, int bytes, BdrvRequestFlags flags)
|
|
|
8b1478 |
{
|
|
|
8b1478 |
BDRVRawState *s = bs->opaque;
|
|
|
8b1478 |
+ int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
|
|
|
8b1478 |
int rc;
|
|
|
8b1478 |
|
|
|
8b1478 |
rc = fd_open(bs);
|
|
|
8b1478 |
if (rc < 0) {
|
|
|
8b1478 |
return rc;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
- if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
|
|
8b1478 |
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
|
|
|
8b1478 |
- QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
|
|
|
8b1478 |
- } else if (s->discard_zeroes) {
|
|
|
8b1478 |
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
|
|
|
8b1478 |
- QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ if (flags & BDRV_REQ_MAY_UNMAP) {
|
|
|
8b1478 |
+ operation |= QEMU_AIO_DISCARD;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
- return -ENOTSUP;
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
|
|
|
8b1478 |
--
|
|
|
8b1478 |
1.8.3.1
|
|
|
8b1478 |
|