diff --git a/SOURCES/kvm-block-raw-posix-Fix-disk-corruption-in-try_fiemap.patch b/SOURCES/kvm-block-raw-posix-Fix-disk-corruption-in-try_fiemap.patch new file mode 100644 index 0000000..1c7f0a4 --- /dev/null +++ b/SOURCES/kvm-block-raw-posix-Fix-disk-corruption-in-try_fiemap.patch @@ -0,0 +1,58 @@ +From d739988c668996b7ba21a87f066d25d6cc6eb578 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:15 +0100 +Subject: [PATCH 2/7] block/raw-posix: Fix disk corruption in try_fiemap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Message-id: <1416324620-16229-3-git-send-email-mreitz@redhat.com> +Patchwork-id: 62437 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 2/7] block/raw-posix: Fix disk corruption in try_fiemap +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +From: Tony Breeds + +Using fiemap without FIEMAP_FLAG_SYNC is a known corrupter. + +Add the FIEMAP_FLAG_SYNC flag to the FS_IOC_FIEMAP ioctl. This has +the downside of significantly reducing performance. + +Reported-By: Michael Steffens +Signed-off-by: Tony Breeds +Cc: Kevin Wolf +Cc: Markus Armbruster +Cc: Stefan Hajnoczi +Cc: Max Reitz +Cc: Pádraig Brady +Cc: Eric Blake +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 38c4d0aea3e1264c86e282d99560330adf2b6e25) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 965dda8..0d963eb 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -1277,7 +1277,7 @@ static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, + + f.fm.fm_start = start; + f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE; +- f.fm.fm_flags = 0; ++ f.fm.fm_flags = FIEMAP_FLAG_SYNC; + f.fm.fm_extent_count = 1; + f.fm.fm_reserved = 0; + if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-raw-posix-Try-both-FIEMAP-and-SEEK_HOLE.patch b/SOURCES/kvm-block-raw-posix-Try-both-FIEMAP-and-SEEK_HOLE.patch new file mode 100644 index 0000000..f315fb4 --- /dev/null +++ b/SOURCES/kvm-block-raw-posix-Try-both-FIEMAP-and-SEEK_HOLE.patch @@ -0,0 +1,224 @@ +From 8f381e0473b6ce4d068048f04043097209526b02 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:14 +0100 +Subject: [PATCH 1/7] block/raw-posix: Try both FIEMAP and SEEK_HOLE + +Message-id: <1416324620-16229-2-git-send-email-mreitz@redhat.com> +Patchwork-id: 62436 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 1/7] block/raw-posix: Try both FIEMAP and SEEK_HOLE +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +The current version of raw-posix always uses ioctl(FS_IOC_FIEMAP) if +FIEMAP is available; lseek with SEEK_HOLE/SEEK_DATA are not even +compiled in in this case. However, there may be implementations which +support the latter but not the former (e.g., NFSv4.2) as well as vice +versa. + +To cover both cases, try FIEMAP first (as this will return -ENOTSUP if +not supported instead of returning a failsafe value (everything +allocated as a single extent)) and if that does not work, fall back to +SEEK_HOLE/SEEK_DATA. + +Signed-off-by: Max Reitz +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 4f11aa8a40351b28c0e67c7276e0003b38cc46ac) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 127 +++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 77 insertions(+), 50 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 9ee5b8e..965dda8 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -146,6 +146,9 @@ typedef struct BDRVRawState { + bool has_discard:1; + bool has_write_zeroes:1; + bool discard_zeroes:1; ++#ifdef CONFIG_FIEMAP ++ bool skip_fiemap; ++#endif + } BDRVRawState; + + typedef struct BDRVRawReopenState { +@@ -1257,53 +1260,29 @@ static int raw_create(const char *filename, QEMUOptionParameter *options, + return result; + } + +-/* +- * Returns true iff the specified sector is present in the disk image. Drivers +- * not implementing the functionality are assumed to not support backing files, +- * hence all their sectors are reported as allocated. +- * +- * If 'sector_num' is beyond the end of the disk image the return value is 0 +- * and 'pnum' is set to 0. +- * +- * 'pnum' is set to the number of sectors (including and immediately following +- * the specified sector) that are known to be in the same +- * allocated/unallocated state. +- * +- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes +- * beyond the end of the disk image it will be clamped. +- */ +-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, +- int64_t sector_num, +- int nb_sectors, int *pnum) ++static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, ++ off_t *hole, int nb_sectors, int *pnum) + { +- off_t start, data, hole; +- int64_t ret; +- +- ret = fd_open(bs); +- if (ret < 0) { +- return ret; +- } +- +- start = sector_num * BDRV_SECTOR_SIZE; +- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; +- + #ifdef CONFIG_FIEMAP +- + BDRVRawState *s = bs->opaque; ++ int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + struct { + struct fiemap fm; + struct fiemap_extent fe; + } f; + ++ if (s->skip_fiemap) { ++ return -ENOTSUP; ++ } ++ + f.fm.fm_start = start; + f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE; + f.fm.fm_flags = 0; + f.fm.fm_extent_count = 1; + f.fm.fm_reserved = 0; + if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) { +- /* Assume everything is allocated. */ +- *pnum = nb_sectors; +- return ret; ++ s->skip_fiemap = true; ++ return -errno; + } + + if (f.fm.fm_mapped_extents == 0) { +@@ -1311,44 +1290,92 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + * f.fm.fm_start + f.fm.fm_length must be clamped to the file size! + */ + off_t length = lseek(s->fd, 0, SEEK_END); +- hole = f.fm.fm_start; +- data = MIN(f.fm.fm_start + f.fm.fm_length, length); ++ *hole = f.fm.fm_start; ++ *data = MIN(f.fm.fm_start + f.fm.fm_length, length); + } else { +- data = f.fe.fe_logical; +- hole = f.fe.fe_logical + f.fe.fe_length; ++ *data = f.fe.fe_logical; ++ *hole = f.fe.fe_logical + f.fe.fe_length; + if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) { + ret |= BDRV_BLOCK_ZERO; + } + } + +-#elif defined SEEK_HOLE && defined SEEK_DATA ++ return ret; ++#else ++ return -ENOTSUP; ++#endif ++} + ++static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, ++ off_t *hole, int *pnum) ++{ ++#if defined SEEK_HOLE && defined SEEK_DATA + BDRVRawState *s = bs->opaque; + +- hole = lseek(s->fd, start, SEEK_HOLE); +- if (hole == -1) { ++ *hole = lseek(s->fd, start, SEEK_HOLE); ++ if (*hole == -1) { + /* -ENXIO indicates that sector_num was past the end of the file. + * There is a virtual hole there. */ + assert(errno != -ENXIO); + +- /* Most likely EINVAL. Assume everything is allocated. */ +- *pnum = nb_sectors; +- return ret; ++ return -errno; + } + +- if (hole > start) { +- data = start; ++ if (*hole > start) { ++ *data = start; + } else { + /* On a hole. We need another syscall to find its end. */ +- data = lseek(s->fd, start, SEEK_DATA); +- if (data == -1) { +- data = lseek(s->fd, 0, SEEK_END); ++ *data = lseek(s->fd, start, SEEK_DATA); ++ if (*data == -1) { ++ *data = lseek(s->fd, 0, SEEK_END); + } + } ++ ++ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + #else +- data = 0; +- hole = start + nb_sectors * BDRV_SECTOR_SIZE; ++ return -ENOTSUP; + #endif ++} ++ ++/* ++ * Returns true iff the specified sector is present in the disk image. Drivers ++ * not implementing the functionality are assumed to not support backing files, ++ * hence all their sectors are reported as allocated. ++ * ++ * If 'sector_num' is beyond the end of the disk image the return value is 0 ++ * and 'pnum' is set to 0. ++ * ++ * 'pnum' is set to the number of sectors (including and immediately following ++ * the specified sector) that are known to be in the same ++ * allocated/unallocated state. ++ * ++ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes ++ * beyond the end of the disk image it will be clamped. ++ */ ++static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, ++ int64_t sector_num, ++ int nb_sectors, int *pnum) ++{ ++ off_t start, data = 0, hole = 0; ++ int64_t ret; ++ ++ ret = fd_open(bs); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ start = sector_num * BDRV_SECTOR_SIZE; ++ ++ ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum); ++ if (ret < 0) { ++ ret = try_seek_hole(bs, start, &data, &hole, pnum); ++ if (ret < 0) { ++ /* Assume everything is allocated. */ ++ data = 0; ++ hole = start + nb_sectors * BDRV_SECTOR_SIZE; ++ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; ++ } ++ } + + if (data <= start) { + /* On a data extent, compute sectors to the end of the extent. */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-raw-posix-use-seek_hole-ahead-of-fiemap.patch b/SOURCES/kvm-block-raw-posix-use-seek_hole-ahead-of-fiemap.patch new file mode 100644 index 0000000..f64ddf8 --- /dev/null +++ b/SOURCES/kvm-block-raw-posix-use-seek_hole-ahead-of-fiemap.patch @@ -0,0 +1,63 @@ +From aa604fc31ecd9c11e59bc586eabea7cfa9c9175e Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:16 +0100 +Subject: [PATCH 3/7] block/raw-posix: use seek_hole ahead of fiemap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Message-id: <1416324620-16229-4-git-send-email-mreitz@redhat.com> +Patchwork-id: 62438 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 3/7] block/raw-posix: use seek_hole ahead of fiemap +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +From: Tony Breeds + +try_fiemap() uses FIEMAP_FLAG_SYNC which has a significant performance +impact. + +Prefer seek_hole() over fiemap() to avoid this impact where possible. +seek_hole is more widely used and, arguably, has potential to be +optimised in the kernel. + +Reported-By: Michael Steffens +Signed-off-by: Tony Breeds +Cc: Kevin Wolf +Cc: Markus Armbruster +Cc: Stefan Hajnoczi +Cc: Max Reitz +Cc: Pádraig Brady +Cc: Eric Blake +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 7c15903789953ead14a417882657d52dc0c19a24) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 0d963eb..47b4b71 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -1366,9 +1366,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + + start = sector_num * BDRV_SECTOR_SIZE; + +- ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum); ++ ret = try_seek_hole(bs, start, &data, &hole, pnum); + if (ret < 0) { +- ret = try_seek_hole(bs, start, &data, &hole, pnum); ++ ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum); + if (ret < 0) { + /* Assume everything is allocated. */ + data = 0; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-raw-posix-Fix-raw_co_get_block_status-after-EOF.patch b/SOURCES/kvm-raw-posix-Fix-raw_co_get_block_status-after-EOF.patch new file mode 100644 index 0000000..306bbca --- /dev/null +++ b/SOURCES/kvm-raw-posix-Fix-raw_co_get_block_status-after-EOF.patch @@ -0,0 +1,89 @@ +From ea2675a74ab31ec9938dd50b4954ea9d6ba5fb50 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:17 +0100 +Subject: [PATCH 4/7] raw-posix: Fix raw_co_get_block_status() after EOF + +Message-id: <1416324620-16229-5-git-send-email-mreitz@redhat.com> +Patchwork-id: 62439 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 4/7] raw-posix: Fix raw_co_get_block_status() after EOF +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +As its comment states, raw_co_get_block_status() should unconditionally +return 0 and set *pnum to 0 for after EOF. + +An assertion after lseek(..., SEEK_HOLE) tried to catch this case by +asserting that errno != -ENXIO (which would indicate a position after +the EOF); but it should be errno != ENXIO instead. Regardless of that, +there should be no such assertion at all. If bdrv_getlength() returned +an outdated value and the image has been resized outside of qemu, +lseek() will return with errno == ENXIO. Just return that value as an +error then. + +Setting *pnum to 0 and returning 0 should not be done here, as in that +case we should update the device length as well. So, from qemu's +perspective, the file has not been resized; it's just that there was an +error querying sectors beyond a certain point (the actual file size). + +Additionally, nb_sectors should be clamped against the image end. This +was probably not an issue if FIEMAP or SEEK_HOLE/SEEK_DATA worked, but +the fallback did not take this case into account. + +Reported-by: Kevin Wolf +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-id: 1414148280-17949-2-git-send-email-mreitz@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit e6d7ec32dd315422a023ed3425fe36df8c274eeb) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 47b4b71..d04e683 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -1314,10 +1314,6 @@ static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, + + *hole = lseek(s->fd, start, SEEK_HOLE); + if (*hole == -1) { +- /* -ENXIO indicates that sector_num was past the end of the file. +- * There is a virtual hole there. */ +- assert(errno != -ENXIO); +- + return -errno; + } + +@@ -1357,6 +1353,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + int nb_sectors, int *pnum) + { + off_t start, data = 0, hole = 0; ++ int64_t total_size; + int64_t ret; + + ret = fd_open(bs); +@@ -1365,6 +1362,15 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + } + + start = sector_num * BDRV_SECTOR_SIZE; ++ total_size = bdrv_getlength(bs); ++ if (total_size < 0) { ++ return total_size; ++ } else if (start >= total_size) { ++ *pnum = 0; ++ return 0; ++ } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { ++ nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); ++ } + + ret = try_seek_hole(bs, start, &data, &hole, pnum); + if (ret < 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-raw-posix-SEEK_HOLE-suffices-get-rid-of-FIEMAP.patch b/SOURCES/kvm-raw-posix-SEEK_HOLE-suffices-get-rid-of-FIEMAP.patch new file mode 100644 index 0000000..d15c01f --- /dev/null +++ b/SOURCES/kvm-raw-posix-SEEK_HOLE-suffices-get-rid-of-FIEMAP.patch @@ -0,0 +1,177 @@ +From dd6a7e436e1bfa441c6851103156828000362560 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:19 +0100 +Subject: [PATCH 6/7] raw-posix: SEEK_HOLE suffices, get rid of FIEMAP + +Message-id: <1416324620-16229-7-git-send-email-mreitz@redhat.com> +Patchwork-id: 62441 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 6/7] raw-posix: SEEK_HOLE suffices, get rid of FIEMAP +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +From: Markus Armbruster + +Commit 5500316 (May 2012) implemented raw_co_is_allocated() as +follows: + +1. If defined(CONFIG_FIEMAP), use the FS_IOC_FIEMAP ioctl + +2. Else if defined(SEEK_HOLE) && defined(SEEK_DATA), use lseek() + +3. Else pretend there are no holes + +Later on, raw_co_is_allocated() was generalized to +raw_co_get_block_status(). + +Commit 4f11aa8 (May 2014) changed it to try the three methods in order +until success, because "there may be implementations which support +[SEEK_HOLE/SEEK_DATA] but not [FIEMAP] (e.g., NFSv4.2) as well as vice +versa." + +Unfortunately, we used FIEMAP incorrectly: we lacked FIEMAP_FLAG_SYNC. +Commit 38c4d0a (Sep 2014) added it. Because that's a significant +speed hit, the next commit 7c159037 put SEEK_HOLE/SEEK_DATA first. + +As you see, the obvious use of FIEMAP is wrong, and the correct use is +slow. I guess this puts it somewhere between -7 "The obvious use is +wrong" and -10 "It's impossible to get right" on Rusty Russel's Hard +to Misuse scale[*]. + +"Fortunately", the FIEMAP code is used only when + +* SEEK_HOLE/SEEK_DATA aren't defined, but CONFIG_FIEMAP is + + Uncommon. SEEK_HOLE had no XFS implementation between 2011 (when it + was introduced for ext4 and btrfs) and 2012. + +* SEEK_HOLE/SEEK_DATA and CONFIG_FIEMAP are defined, but lseek() fails + + Unlikely. + +Thus, the FIEMAP code executes rarely. Makes it a nice hidey-hole for +bugs. Worse, bugs hiding there can theoretically bite even on a host +that has SEEK_HOLE/SEEK_DATA. + +I don't want to worry about this crap, not even theoretically. Get +rid of it. + +[*] http://ozlabs.org/~rusty/index.cgi/tech/2008-04-01.html + +Signed-off-by: Markus Armbruster +Reviewed-by: Max Reitz +Reviewed-by: Eric Blake +Signed-off-by: Max Reitz +(cherry picked from commit c4875e5b2216cf5427459e619b10f75083565792) +Signed-off-by: Miroslav Rezanina + +Conflicts: + block/raw-posix.c + +Upstream has a "needs_alignment" field in BDRVRawState, downstream does +not. + +Signed-off-by: Max Reitz +--- + block/raw-posix.c | 63 ++++--------------------------------------------------- + 1 file changed, 4 insertions(+), 59 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 78b968a..3ddcd2d 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -56,9 +56,6 @@ + #include + #include + #endif +-#ifdef CONFIG_FIEMAP +-#include +-#endif + #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + #include + #endif +@@ -146,9 +143,6 @@ typedef struct BDRVRawState { + bool has_discard:1; + bool has_write_zeroes:1; + bool discard_zeroes:1; +-#ifdef CONFIG_FIEMAP +- bool skip_fiemap; +-#endif + } BDRVRawState; + + typedef struct BDRVRawReopenState { +@@ -1260,52 +1254,6 @@ static int raw_create(const char *filename, QEMUOptionParameter *options, + return result; + } + +-static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data, +- off_t *hole, int nb_sectors) +-{ +-#ifdef CONFIG_FIEMAP +- BDRVRawState *s = bs->opaque; +- int ret = 0; +- struct { +- struct fiemap fm; +- struct fiemap_extent fe; +- } f; +- +- if (s->skip_fiemap) { +- return -ENOTSUP; +- } +- +- f.fm.fm_start = start; +- f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE; +- f.fm.fm_flags = FIEMAP_FLAG_SYNC; +- f.fm.fm_extent_count = 1; +- f.fm.fm_reserved = 0; +- if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) { +- s->skip_fiemap = true; +- return -errno; +- } +- +- if (f.fm.fm_mapped_extents == 0) { +- /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length. +- * f.fm.fm_start + f.fm.fm_length must be clamped to the file size! +- */ +- off_t length = lseek(s->fd, 0, SEEK_END); +- *hole = f.fm.fm_start; +- *data = MIN(f.fm.fm_start + f.fm.fm_length, length); +- } else { +- *data = f.fe.fe_logical; +- *hole = f.fe.fe_logical + f.fe.fe_length; +- if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) { +- ret |= BDRV_BLOCK_ZERO; +- } +- } +- +- return ret; +-#else +- return -ENOTSUP; +-#endif +-} +- + static int try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, + off_t *hole) + { +@@ -1374,13 +1322,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + + ret = try_seek_hole(bs, start, &data, &hole); + if (ret < 0) { +- ret = try_fiemap(bs, start, &data, &hole, nb_sectors); +- if (ret < 0) { +- /* Assume everything is allocated. */ +- data = 0; +- hole = start + nb_sectors * BDRV_SECTOR_SIZE; +- ret = 0; +- } ++ /* Assume everything is allocated. */ ++ data = 0; ++ hole = start + nb_sectors * BDRV_SECTOR_SIZE; ++ ret = 0; + } + + assert(ret >= 0); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-raw-posix-The-SEEK_HOLE-code-is-flawed-rewrite-it.patch b/SOURCES/kvm-raw-posix-The-SEEK_HOLE-code-is-flawed-rewrite-it.patch new file mode 100644 index 0000000..a3ddaeb --- /dev/null +++ b/SOURCES/kvm-raw-posix-The-SEEK_HOLE-code-is-flawed-rewrite-it.patch @@ -0,0 +1,197 @@ +From f3d3993ffd2caeef594351702f42e86218f2a2fb Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:20 +0100 +Subject: [PATCH 7/7] raw-posix: The SEEK_HOLE code is flawed, rewrite it + +Message-id: <1416324620-16229-8-git-send-email-mreitz@redhat.com> +Patchwork-id: 62442 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 7/7] raw-posix: The SEEK_HOLE code is flawed, rewrite it +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +From: Markus Armbruster + +On systems where SEEK_HOLE in a trailing hole seeks to EOF (Solaris, +but not Linux), try_seek_hole() reports trailing data instead. + +Additionally, unlikely lseek() failures are treated badly: + +* When SEEK_HOLE fails, try_seek_hole() reports trailing data. For + -ENXIO, there's in fact a trailing hole. Can happen only when + something truncated the file since we opened it. + +* When SEEK_HOLE succeeds, SEEK_DATA fails, and SEEK_END succeeds, + then try_seek_hole() reports a trailing hole. This is okay only + when SEEK_DATA failed with -ENXIO (which means the non-trailing hole + found by SEEK_HOLE has since become trailing somehow). For other + failures (unlikely), it's wrong. + +* When SEEK_HOLE succeeds, SEEK_DATA fails, SEEK_END fails (unlikely), + then try_seek_hole() reports bogus data [-1,start), which its caller + raw_co_get_block_status() turns into zero sectors of data. Could + theoretically lead to infinite loops in code that attempts to scan + data vs. hole forward. + +Rewrite from scratch, with very careful comments. + +Signed-off-by: Markus Armbruster +Reviewed-by: Max Reitz +Reviewed-by: Eric Blake +Signed-off-by: Max Reitz +(cherry picked from commit d1f06fe665acdd7aa7a46a5ef88172c3d7d3028e) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 111 +++++++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 85 insertions(+), 26 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index 3ddcd2d..f2244a9 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -1254,28 +1254,86 @@ static int raw_create(const char *filename, QEMUOptionParameter *options, + return result; + } + +-static int try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, +- off_t *hole) ++/* ++ * Find allocation range in @bs around offset @start. ++ * May change underlying file descriptor's file offset. ++ * If @start is not in a hole, store @start in @data, and the ++ * beginning of the next hole in @hole, and return 0. ++ * If @start is in a non-trailing hole, store @start in @hole and the ++ * beginning of the next non-hole in @data, and return 0. ++ * If @start is in a trailing hole or beyond EOF, return -ENXIO. ++ * If we can't find out, return a negative errno other than -ENXIO. ++ */ ++static int find_allocation(BlockDriverState *bs, off_t start, ++ off_t *data, off_t *hole) + { + #if defined SEEK_HOLE && defined SEEK_DATA + BDRVRawState *s = bs->opaque; ++ off_t offs; + +- *hole = lseek(s->fd, start, SEEK_HOLE); +- if (*hole == -1) { +- return -errno; ++ /* ++ * SEEK_DATA cases: ++ * D1. offs == start: start is in data ++ * D2. offs > start: start is in a hole, next data at offs ++ * D3. offs < 0, errno = ENXIO: either start is in a trailing hole ++ * or start is beyond EOF ++ * If the latter happens, the file has been truncated behind ++ * our back since we opened it. All bets are off then. ++ * Treating like a trailing hole is simplest. ++ * D4. offs < 0, errno != ENXIO: we learned nothing ++ */ ++ offs = lseek(s->fd, start, SEEK_DATA); ++ if (offs < 0) { ++ return -errno; /* D3 or D4 */ ++ } ++ assert(offs >= start); ++ ++ if (offs > start) { ++ /* D2: in hole, next data at offs */ ++ *hole = start; ++ *data = offs; ++ return 0; + } + +- if (*hole > start) { ++ /* D1: in data, end not yet known */ ++ ++ /* ++ * SEEK_HOLE cases: ++ * H1. offs == start: start is in a hole ++ * If this happens here, a hole has been dug behind our back ++ * since the previous lseek(). ++ * H2. offs > start: either start is in data, next hole at offs, ++ * or start is in trailing hole, EOF at offs ++ * Linux treats trailing holes like any other hole: offs == ++ * start. Solaris seeks to EOF instead: offs > start (blech). ++ * If that happens here, a hole has been dug behind our back ++ * since the previous lseek(). ++ * H3. offs < 0, errno = ENXIO: start is beyond EOF ++ * If this happens, the file has been truncated behind our ++ * back since we opened it. Treat it like a trailing hole. ++ * H4. offs < 0, errno != ENXIO: we learned nothing ++ * Pretend we know nothing at all, i.e. "forget" about D1. ++ */ ++ offs = lseek(s->fd, start, SEEK_HOLE); ++ if (offs < 0) { ++ return -errno; /* D1 and (H3 or H4) */ ++ } ++ assert(offs >= start); ++ ++ if (offs > start) { ++ /* ++ * D1 and H2: either in data, next hole at offs, or it was in ++ * data but is now in a trailing hole. In the latter case, ++ * all bets are off. Treating it as if it there was data all ++ * the way to EOF is safe, so simply do that. ++ */ + *data = start; +- } else { +- /* On a hole. We need another syscall to find its end. */ +- *data = lseek(s->fd, start, SEEK_DATA); +- if (*data == -1) { +- *data = lseek(s->fd, 0, SEEK_END); +- } ++ *hole = offs; ++ return 0; + } + +- return 0; ++ /* D1 and H1 */ ++ return -EBUSY; + #else + return -ENOTSUP; + #endif +@@ -1320,25 +1378,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + } + +- ret = try_seek_hole(bs, start, &data, &hole); +- if (ret < 0) { +- /* Assume everything is allocated. */ +- data = 0; +- hole = start + nb_sectors * BDRV_SECTOR_SIZE; +- ret = 0; +- } +- +- assert(ret >= 0); +- +- if (data <= start) { ++ ret = find_allocation(bs, start, &data, &hole); ++ if (ret == -ENXIO) { ++ /* Trailing hole */ ++ *pnum = nb_sectors; ++ ret = BDRV_BLOCK_ZERO; ++ } else if (ret < 0) { ++ /* No info available, so pretend there are no holes */ ++ *pnum = nb_sectors; ++ ret = BDRV_BLOCK_DATA; ++ } else if (data == start) { + /* On a data extent, compute sectors to the end of the extent. */ + *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); +- return ret | BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; ++ ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute sectors to the beginning of the next extent. */ ++ assert(hole == start); + *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); +- return ret | BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID | start; ++ ret = BDRV_BLOCK_ZERO; + } ++ return ret | BDRV_BLOCK_OFFSET_VALID | start; + } + + static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-raw-posix-raw_co_get_block_status-return-value.patch b/SOURCES/kvm-raw-posix-raw_co_get_block_status-return-value.patch new file mode 100644 index 0000000..d72bf58 --- /dev/null +++ b/SOURCES/kvm-raw-posix-raw_co_get_block_status-return-value.patch @@ -0,0 +1,122 @@ +From 7cea0711fbd71118754947385a157ff1b72e8234 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 18 Nov 2014 15:30:18 +0100 +Subject: [PATCH 5/7] raw-posix: raw_co_get_block_status() return value + +Message-id: <1416324620-16229-6-git-send-email-mreitz@redhat.com> +Patchwork-id: 62440 +O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 5/7] raw-posix: raw_co_get_block_status() return value +Bugzilla: 1166605 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +Instead of generating the full return value thrice in try_fiemap(), +try_seek_hole() and as a fall-back in raw_co_get_block_status() itself, +generate the value only in raw_co_get_block_status(). + +While at it, also remove the pnum parameter from try_fiemap() and +try_seek_hole(). + +Suggested-by: Kevin Wolf +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-id: 1414148280-17949-3-git-send-email-mreitz@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit d7f62751a14d1d34c7d388431a3e403ef1fe28a5) + +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/raw-posix.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/block/raw-posix.c b/block/raw-posix.c +index d04e683..78b968a 100644 +--- a/block/raw-posix.c ++++ b/block/raw-posix.c +@@ -1260,12 +1260,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options, + return result; + } + +-static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, +- off_t *hole, int nb_sectors, int *pnum) ++static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data, ++ off_t *hole, int nb_sectors) + { + #ifdef CONFIG_FIEMAP + BDRVRawState *s = bs->opaque; +- int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; ++ int ret = 0; + struct { + struct fiemap fm; + struct fiemap_extent fe; +@@ -1306,8 +1306,8 @@ static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data, + #endif + } + +-static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, +- off_t *hole, int *pnum) ++static int try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, ++ off_t *hole) + { + #if defined SEEK_HOLE && defined SEEK_DATA + BDRVRawState *s = bs->opaque; +@@ -1327,7 +1327,7 @@ static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data, + } + } + +- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; ++ return 0; + #else + return -ENOTSUP; + #endif +@@ -1354,7 +1354,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + { + off_t start, data = 0, hole = 0; + int64_t total_size; +- int64_t ret; ++ int ret; + + ret = fd_open(bs); + if (ret < 0) { +@@ -1372,28 +1372,28 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + } + +- ret = try_seek_hole(bs, start, &data, &hole, pnum); ++ ret = try_seek_hole(bs, start, &data, &hole); + if (ret < 0) { +- ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum); ++ ret = try_fiemap(bs, start, &data, &hole, nb_sectors); + if (ret < 0) { + /* Assume everything is allocated. */ + data = 0; + hole = start + nb_sectors * BDRV_SECTOR_SIZE; +- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; ++ ret = 0; + } + } + ++ assert(ret >= 0); ++ + if (data <= start) { + /* On a data extent, compute sectors to the end of the extent. */ + *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); ++ return ret | BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + } else { + /* On a hole, compute sectors to the beginning of the next extent. */ + *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); +- ret &= ~BDRV_BLOCK_DATA; +- ret |= BDRV_BLOCK_ZERO; ++ return ret | BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID | start; + } +- +- return ret; + } + + static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, +-- +1.8.3.1 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 5ac5dda..5fc3aaf 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -73,7 +73,7 @@ Obsoletes: %1 < %{obsoletes_version} \ Summary: QEMU is a FAST! processor emulator Name: %{pkgname}%{?pkgsuffix} Version: 1.5.3 -Release: 60%{?dist}.10 +Release: 60%{?dist}.11 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 10 License: GPLv2+ and LGPLv2+ and BSD @@ -2356,6 +2356,20 @@ Patch1150: kvm-block-extend-block-commit-to-accept-a-string-for-the.patch Patch1151: kvm-block-add-backing-file-option-to-block-stream.patch # For bz#1122925 - Maintain relative path to backing file image during live merge (block-commit) Patch1152: kvm-block-add-__com.redhat_change-backing-file-qmp-comma.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1153: kvm-block-raw-posix-Try-both-FIEMAP-and-SEEK_HOLE.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1154: kvm-block-raw-posix-Fix-disk-corruption-in-try_fiemap.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1155: kvm-block-raw-posix-use-seek_hole-ahead-of-fiemap.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1156: kvm-raw-posix-Fix-raw_co_get_block_status-after-EOF.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1157: kvm-raw-posix-raw_co_get_block_status-return-value.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1158: kvm-raw-posix-SEEK_HOLE-suffices-get-rid-of-FIEMAP.patch +# For bz#1166605 - qemu-img convert intermittently corrupts output images +Patch1159: kvm-raw-posix-The-SEEK_HOLE-code-is-flawed-rewrite-it.patch BuildRequires: zlib-devel @@ -3710,6 +3724,13 @@ cp %{SOURCE18} pc-bios # keep "make check" happy %patch1150 -p1 %patch1151 -p1 %patch1152 -p1 +%patch1153 -p1 +%patch1154 -p1 +%patch1155 -p1 +%patch1156 -p1 +%patch1157 -p1 +%patch1158 -p1 +%patch1159 -p1 %build buildarch="%{kvm_target}-softmmu" @@ -4126,6 +4147,17 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_libdir}/pkgconfig/libcacard.pc %changelog +* Mon Nov 24 2014 Miroslav Rezanina - 1.5.3-60.el7_0.11 +- kvm-block-raw-posix-Try-both-FIEMAP-and-SEEK_HOLE.patch [bz#1166605] +- kvm-block-raw-posix-Fix-disk-corruption-in-try_fiemap.patch [bz#1166605] +- kvm-block-raw-posix-use-seek_hole-ahead-of-fiemap.patch [bz#1166605] +- kvm-raw-posix-Fix-raw_co_get_block_status-after-EOF.patch [bz#1166605] +- kvm-raw-posix-raw_co_get_block_status-return-value.patch [bz#1166605] +- kvm-raw-posix-SEEK_HOLE-suffices-get-rid-of-FIEMAP.patch [bz#1166605] +- kvm-raw-posix-The-SEEK_HOLE-code-is-flawed-rewrite-it.patch [bz#1166605] +- Resolves: bz#1166605 + (qemu-img convert intermittently corrupts output images) + * Mon Sep 29 2014 Miroslav Rezanina - 1.5.3-60.el7_0.10 - kvm-block-add-helper-function-to-determine-if-a-BDS-is-i.patch [bz#1122925] - kvm-block-extend-block-commit-to-accept-a-string-for-the.patch [bz#1122925]