Blame SOURCES/kvm-file-posix-Use-max-transfer-length-segment-count-onl.patch

4636b8
From 07dc43478f4d96eb1cd21a40a26262beb1ca2dd6 Mon Sep 17 00:00:00 2001
4636b8
From: Maxim Levitsky <mlevitsk@redhat.com>
4636b8
Date: Mon, 27 Apr 2020 08:39:13 +0200
4636b8
Subject: [PATCH] file-posix: Use max transfer length/segment count only for
4636b8
 SCSI passthrough
4636b8
4636b8
RH-Author: Maxim Levitsky <mlevitsk@redhat.com>
4636b8
Message-id: <20200420155959.20876-2-mlevitsk@redhat.com>
4636b8
Patchwork-id: 95748
4636b8
O-Subject: [RHEL-7.9 qemu-kvm-rhev PATCH 1/1] file-posix: Use max transfer length/segment count only for SCSI passthrough
4636b8
Bugzilla: 1819253
4636b8
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
4636b8
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
4636b8
RH-Acked-by: Max Reitz <mreitz@redhat.com>
4636b8
4636b8
Regular kernel block devices (/dev/sda*, /dev/nvme*, etc) don't have
4636b8
max segment size/max segment count hardware requirements exposed
4636b8
to the userspace, but rather the kernel block layer
4636b8
takes care to split the incoming requests that
4636b8
violate these requirements.
4636b8
4636b8
Allowing the kernel to do the splitting allows qemu to avoid
4636b8
various overheads that arise otherwise from this.
4636b8
4636b8
This is especially visible in nbd server,
4636b8
exposing as a raw file, a mostly empty qcow2 image over the net.
4636b8
In this case most of the reads by the remote user
4636b8
won't even hit the underlying kernel block device,
4636b8
and therefore most of the  overhead will be in the
4636b8
nbd traffic which increases significantly with lower max transfer size.
4636b8
4636b8
In addition to that even for local block device
4636b8
access the peformance improves a bit due to less
4636b8
traffic between qemu and the kernel when large
4636b8
transfer sizes are used (e.g for image conversion)
4636b8
4636b8
More info can be found at:
4636b8
https://bugzilla.redhat.com/show_bug.cgi?id=1647104
4636b8
4636b8
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
4636b8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4636b8
Reviewed-by: Eric Blake <eblake@redhat.com>
4636b8
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
4636b8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4636b8
(cherry picked from commit 867eccfed84f96b54f4a432c510a02c2ce03b430)
4636b8
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
4636b8
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
4636b8
---
4636b8
 block/file-posix.c | 54 ++++++++++++++++++++++++++++--------------------------
4636b8
 1 file changed, 28 insertions(+), 26 deletions(-)
4636b8
4636b8
diff --git a/block/file-posix.c b/block/file-posix.c
4636b8
index 548424d..9e5cd68 100644
4636b8
--- a/block/file-posix.c
4636b8
+++ b/block/file-posix.c
4636b8
@@ -1008,15 +1008,13 @@ static void raw_reopen_abort(BDRVReopenState *state)
4636b8
     s->reopen_state = NULL;
4636b8
 }
4636b8
 
4636b8
-static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
4636b8
+static int sg_get_max_transfer_length(int fd)
4636b8
 {
4636b8
 #ifdef BLKSECTGET
4636b8
     int max_bytes = 0;
4636b8
-    short max_sectors = 0;
4636b8
-    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
4636b8
+
4636b8
+    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
4636b8
         return max_bytes;
4636b8
-    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
4636b8
-        return max_sectors << BDRV_SECTOR_BITS;
4636b8
     } else {
4636b8
         return -errno;
4636b8
     }
4636b8
@@ -1025,25 +1023,31 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
4636b8
 #endif
4636b8
 }
4636b8
 
4636b8
-static int hdev_get_max_segments(const struct stat *st)
4636b8
+static int sg_get_max_segments(int fd)
4636b8
 {
4636b8
 #ifdef CONFIG_LINUX
4636b8
     char buf[32];
4636b8
     const char *end;
4636b8
-    char *sysfspath;
4636b8
+    char *sysfspath = NULL;
4636b8
     int ret;
4636b8
-    int fd = -1;
4636b8
+    int sysfd = -1;
4636b8
     long max_segments;
4636b8
+    struct stat st;
4636b8
+
4636b8
+    if (fstat(fd, &st)) {
4636b8
+        ret = -errno;
4636b8
+        goto out;
4636b8
+    }
4636b8
 
4636b8
     sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
4636b8
-                                major(st->st_rdev), minor(st->st_rdev));
4636b8
-    fd = open(sysfspath, O_RDONLY);
4636b8
-    if (fd == -1) {
4636b8
+                                major(st.st_rdev), minor(st.st_rdev));
4636b8
+    sysfd = open(sysfspath, O_RDONLY);
4636b8
+    if (sysfd == -1) {
4636b8
         ret = -errno;
4636b8
         goto out;
4636b8
     }
4636b8
     do {
4636b8
-        ret = read(fd, buf, sizeof(buf) - 1);
4636b8
+        ret = read(sysfd, buf, sizeof(buf) - 1);
4636b8
     } while (ret == -1 && errno == EINTR);
4636b8
     if (ret < 0) {
4636b8
         ret = -errno;
4636b8
@@ -1060,8 +1064,8 @@ static int hdev_get_max_segments(const struct stat *st)
4636b8
     }
4636b8
 
4636b8
 out:
4636b8
-    if (fd != -1) {
4636b8
-        close(fd);
4636b8
+    if (sysfd != -1) {
4636b8
+        close(sysfd);
4636b8
     }
4636b8
     g_free(sysfspath);
4636b8
     return ret;
4636b8
@@ -1073,19 +1077,17 @@ out:
4636b8
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
4636b8
 {
4636b8
     BDRVRawState *s = bs->opaque;
4636b8
-    struct stat st;
4636b8
 
4636b8
-    if (!fstat(s->fd, &st)) {
4636b8
-        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
4636b8
-            int ret = hdev_get_max_transfer_length(bs, s->fd);
4636b8
-            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
4636b8
-                bs->bl.max_transfer = pow2floor(ret);
4636b8
-            }
4636b8
-            ret = hdev_get_max_segments(&st);
4636b8
-            if (ret > 0) {
4636b8
-                bs->bl.max_transfer = MIN(bs->bl.max_transfer,
4636b8
-                                          ret * getpagesize());
4636b8
-            }
4636b8
+    if (bs->sg) {
4636b8
+        int ret = sg_get_max_transfer_length(s->fd);
4636b8
+
4636b8
+        if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
4636b8
+            bs->bl.max_transfer = pow2floor(ret);
4636b8
+        }
4636b8
+
4636b8
+        ret = sg_get_max_segments(s->fd);
4636b8
+        if (ret > 0) {
4636b8
+            bs->bl.max_transfer = MIN(bs->bl.max_transfer, ret * getpagesize());
4636b8
         }
4636b8
     }
4636b8
 
4636b8
-- 
4636b8
1.8.3.1
4636b8