9ae3a8
From 04981be7338ba396410cdab91612c17d6383d3a3 Mon Sep 17 00:00:00 2001
9ae3a8
From: Max Reitz <mreitz@redhat.com>
9ae3a8
Date: Tue, 18 Nov 2014 15:30:14 +0100
9ae3a8
Subject: [PATCH 35/41] block/raw-posix: Try both FIEMAP and SEEK_HOLE
9ae3a8
9ae3a8
Message-id: <1416324620-16229-2-git-send-email-mreitz@redhat.com>
9ae3a8
Patchwork-id: 62436
9ae3a8
O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 1/7] block/raw-posix: Try both FIEMAP and SEEK_HOLE
9ae3a8
Bugzilla: 1160237
9ae3a8
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
9ae3a8
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
RH-Acked-by: Markus Armbruster <armbru@redhat.com>
9ae3a8
9ae3a8
The current version of raw-posix always uses ioctl(FS_IOC_FIEMAP) if
9ae3a8
FIEMAP is available; lseek with SEEK_HOLE/SEEK_DATA are not even
9ae3a8
compiled in in this case. However, there may be implementations which
9ae3a8
support the latter but not the former (e.g., NFSv4.2) as well as vice
9ae3a8
versa.
9ae3a8
9ae3a8
To cover both cases, try FIEMAP first (as this will return -ENOTSUP if
9ae3a8
not supported instead of returning a failsafe value (everything
9ae3a8
allocated as a single extent)) and if that does not work, fall back to
9ae3a8
SEEK_HOLE/SEEK_DATA.
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9ae3a8
(cherry picked from commit 4f11aa8a40351b28c0e67c7276e0003b38cc46ac)
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9ae3a8
---
9ae3a8
 block/raw-posix.c | 127 +++++++++++++++++++++++++++++++++---------------------
9ae3a8
 1 file changed, 77 insertions(+), 50 deletions(-)
9ae3a8
9ae3a8
diff --git a/block/raw-posix.c b/block/raw-posix.c
9ae3a8
index cfe7452..5f57412 100644
9ae3a8
--- a/block/raw-posix.c
9ae3a8
+++ b/block/raw-posix.c
9ae3a8
@@ -147,6 +147,9 @@ typedef struct BDRVRawState {
9ae3a8
     bool has_discard:1;
9ae3a8
     bool has_write_zeroes:1;
9ae3a8
     bool discard_zeroes:1;
9ae3a8
+#ifdef CONFIG_FIEMAP
9ae3a8
+    bool skip_fiemap;
9ae3a8
+#endif
9ae3a8
 } BDRVRawState;
9ae3a8
 
9ae3a8
 typedef struct BDRVRawReopenState {
9ae3a8
@@ -1305,53 +1308,29 @@ out:
9ae3a8
     return result;
9ae3a8
 }
9ae3a8
 
9ae3a8
-/*
9ae3a8
- * Returns true iff the specified sector is present in the disk image. Drivers
9ae3a8
- * not implementing the functionality are assumed to not support backing files,
9ae3a8
- * hence all their sectors are reported as allocated.
9ae3a8
- *
9ae3a8
- * If 'sector_num' is beyond the end of the disk image the return value is 0
9ae3a8
- * and 'pnum' is set to 0.
9ae3a8
- *
9ae3a8
- * 'pnum' is set to the number of sectors (including and immediately following
9ae3a8
- * the specified sector) that are known to be in the same
9ae3a8
- * allocated/unallocated state.
9ae3a8
- *
9ae3a8
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
9ae3a8
- * beyond the end of the disk image it will be clamped.
9ae3a8
- */
9ae3a8
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
9ae3a8
-                                            int64_t sector_num,
9ae3a8
-                                            int nb_sectors, int *pnum)
9ae3a8
+static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
9ae3a8
+                          off_t *hole, int nb_sectors, int *pnum)
9ae3a8
 {
9ae3a8
-    off_t start, data, hole;
9ae3a8
-    int64_t ret;
9ae3a8
-
9ae3a8
-    ret = fd_open(bs);
9ae3a8
-    if (ret < 0) {
9ae3a8
-        return ret;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    start = sector_num * BDRV_SECTOR_SIZE;
9ae3a8
-    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
9ae3a8
-
9ae3a8
 #ifdef CONFIG_FIEMAP
9ae3a8
-
9ae3a8
     BDRVRawState *s = bs->opaque;
9ae3a8
+    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
9ae3a8
     struct {
9ae3a8
         struct fiemap fm;
9ae3a8
         struct fiemap_extent fe;
9ae3a8
     } f;
9ae3a8
 
9ae3a8
+    if (s->skip_fiemap) {
9ae3a8
+        return -ENOTSUP;
9ae3a8
+    }
9ae3a8
+
9ae3a8
     f.fm.fm_start = start;
9ae3a8
     f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
9ae3a8
     f.fm.fm_flags = 0;
9ae3a8
     f.fm.fm_extent_count = 1;
9ae3a8
     f.fm.fm_reserved = 0;
9ae3a8
     if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
9ae3a8
-        /* Assume everything is allocated.  */
9ae3a8
-        *pnum = nb_sectors;
9ae3a8
-        return ret;
9ae3a8
+        s->skip_fiemap = true;
9ae3a8
+        return -errno;
9ae3a8
     }
9ae3a8
 
9ae3a8
     if (f.fm.fm_mapped_extents == 0) {
9ae3a8
@@ -1359,44 +1338,92 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
9ae3a8
          * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
9ae3a8
          */
9ae3a8
         off_t length = lseek(s->fd, 0, SEEK_END);
9ae3a8
-        hole = f.fm.fm_start;
9ae3a8
-        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
9ae3a8
+        *hole = f.fm.fm_start;
9ae3a8
+        *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
9ae3a8
     } else {
9ae3a8
-        data = f.fe.fe_logical;
9ae3a8
-        hole = f.fe.fe_logical + f.fe.fe_length;
9ae3a8
+        *data = f.fe.fe_logical;
9ae3a8
+        *hole = f.fe.fe_logical + f.fe.fe_length;
9ae3a8
         if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
9ae3a8
             ret |= BDRV_BLOCK_ZERO;
9ae3a8
         }
9ae3a8
     }
9ae3a8
 
9ae3a8
-#elif defined SEEK_HOLE && defined SEEK_DATA
9ae3a8
+    return ret;
9ae3a8
+#else
9ae3a8
+    return -ENOTSUP;
9ae3a8
+#endif
9ae3a8
+}
9ae3a8
 
9ae3a8
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
9ae3a8
+                             off_t *hole, int *pnum)
9ae3a8
+{
9ae3a8
+#if defined SEEK_HOLE && defined SEEK_DATA
9ae3a8
     BDRVRawState *s = bs->opaque;
9ae3a8
 
9ae3a8
-    hole = lseek(s->fd, start, SEEK_HOLE);
9ae3a8
-    if (hole == -1) {
9ae3a8
+    *hole = lseek(s->fd, start, SEEK_HOLE);
9ae3a8
+    if (*hole == -1) {
9ae3a8
         /* -ENXIO indicates that sector_num was past the end of the file.
9ae3a8
          * There is a virtual hole there.  */
9ae3a8
         assert(errno != -ENXIO);
9ae3a8
 
9ae3a8
-        /* Most likely EINVAL.  Assume everything is allocated.  */
9ae3a8
-        *pnum = nb_sectors;
9ae3a8
-        return ret;
9ae3a8
+        return -errno;
9ae3a8
     }
9ae3a8
 
9ae3a8
-    if (hole > start) {
9ae3a8
-        data = start;
9ae3a8
+    if (*hole > start) {
9ae3a8
+        *data = start;
9ae3a8
     } else {
9ae3a8
         /* On a hole.  We need another syscall to find its end.  */
9ae3a8
-        data = lseek(s->fd, start, SEEK_DATA);
9ae3a8
-        if (data == -1) {
9ae3a8
-            data = lseek(s->fd, 0, SEEK_END);
9ae3a8
+        *data = lseek(s->fd, start, SEEK_DATA);
9ae3a8
+        if (*data == -1) {
9ae3a8
+            *data = lseek(s->fd, 0, SEEK_END);
9ae3a8
         }
9ae3a8
     }
9ae3a8
+
9ae3a8
+    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
9ae3a8
 #else
9ae3a8
-    data = 0;
9ae3a8
-    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
9ae3a8
+    return -ENOTSUP;
9ae3a8
 #endif
9ae3a8
+}
9ae3a8
+
9ae3a8
+/*
9ae3a8
+ * Returns true iff the specified sector is present in the disk image. Drivers
9ae3a8
+ * not implementing the functionality are assumed to not support backing files,
9ae3a8
+ * hence all their sectors are reported as allocated.
9ae3a8
+ *
9ae3a8
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
9ae3a8
+ * and 'pnum' is set to 0.
9ae3a8
+ *
9ae3a8
+ * 'pnum' is set to the number of sectors (including and immediately following
9ae3a8
+ * the specified sector) that are known to be in the same
9ae3a8
+ * allocated/unallocated state.
9ae3a8
+ *
9ae3a8
+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
9ae3a8
+ * beyond the end of the disk image it will be clamped.
9ae3a8
+ */
9ae3a8
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
9ae3a8
+                                                    int64_t sector_num,
9ae3a8
+                                                    int nb_sectors, int *pnum)
9ae3a8
+{
9ae3a8
+    off_t start, data = 0, hole = 0;
9ae3a8
+    int64_t ret;
9ae3a8
+
9ae3a8
+    ret = fd_open(bs);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        return ret;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    start = sector_num * BDRV_SECTOR_SIZE;
9ae3a8
+
9ae3a8
+    ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        ret = try_seek_hole(bs, start, &data, &hole, pnum);
9ae3a8
+        if (ret < 0) {
9ae3a8
+            /* Assume everything is allocated. */
9ae3a8
+            data = 0;
9ae3a8
+            hole = start + nb_sectors * BDRV_SECTOR_SIZE;
9ae3a8
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
9ae3a8
+        }
9ae3a8
+    }
9ae3a8
 
9ae3a8
     if (data <= start) {
9ae3a8
         /* On a data extent, compute sectors to the end of the extent.  */
9ae3a8
-- 
9ae3a8
1.8.3.1
9ae3a8