yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch

ae23c9
From f37a1e337dd62c873f18aabd31863c8df144c7ea Mon Sep 17 00:00:00 2001
ae23c9
From: Alex Williamson <alex.williamson@redhat.com>
ae23c9
Date: Mon, 3 Dec 2018 22:01:54 +0000
ae23c9
Subject: [PATCH 13/16] vfio/ccw/pci: Allow devices to opt-in for ballooning
ae23c9
ae23c9
RH-Author: Alex Williamson <alex.williamson@redhat.com>
ae23c9
Message-id: <154387451469.27651.8657130146789267501.stgit@gimli.home>
ae23c9
Patchwork-id: 83236
ae23c9
O-Subject: [RHEL-8.0 qemu-kvm PATCH 4/7] vfio/ccw/pci: Allow devices to opt-in for ballooning
ae23c9
Bugzilla: 1650272
ae23c9
RH-Acked-by: Peter Xu <peterx@redhat.com>
ae23c9
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
ae23c9
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
ae23c9
RH-Acked-by: David Hildenbrand <david@redhat.com>
ae23c9
ae23c9
Bugzilla: 1650272
ae23c9
ae23c9
If a vfio assigned device makes use of a physical IOMMU, then memory
ae23c9
ballooning is necessarily inhibited due to the page pinning, lack of
ae23c9
page level granularity at the IOMMU, and sufficient notifiers to both
ae23c9
remove the page on balloon inflation and add it back on deflation.
ae23c9
However, not all devices are backed by a physical IOMMU.  In the case
ae23c9
of mediated devices, if a vendor driver is well synchronized with the
ae23c9
guest driver, such that only pages actively used by the guest driver
ae23c9
are pinned by the host mdev vendor driver, then there should be no
ae23c9
overlap between pages available for the balloon driver and pages
ae23c9
actively in use by the device.  Under these conditions, ballooning
ae23c9
should be safe.
ae23c9
ae23c9
vfio-ccw devices are always mediated devices and always operate under
ae23c9
the constraints above.  Therefore we can consider all vfio-ccw devices
ae23c9
as balloon compatible.
ae23c9
ae23c9
The situation is far from straightforward with vfio-pci.  These
ae23c9
devices can be physical devices with physical IOMMU backing or
ae23c9
mediated devices where it is unknown whether a physical IOMMU is in
ae23c9
use or whether the vendor driver is well synchronized to the working
ae23c9
set of the guest driver.  The safest approach is therefore to assume
ae23c9
all vfio-pci devices are incompatible with ballooning, but allow user
ae23c9
opt-in should they have further insight into mediated devices.
ae23c9
ae23c9
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
ae23c9
(cherry picked from commit 238e91728503d400e1c4e644e3a9b80f9e621682)
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 hw/vfio/ccw.c                 |  9 +++++++++
ae23c9
 hw/vfio/common.c              | 23 ++++++++++++++++++++++-
ae23c9
 hw/vfio/pci.c                 | 26 +++++++++++++++++++++++++-
ae23c9
 hw/vfio/trace-events          |  1 +
ae23c9
 include/hw/vfio/vfio-common.h |  2 ++
ae23c9
 5 files changed, 59 insertions(+), 2 deletions(-)
ae23c9
ae23c9
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
ae23c9
index fe34b50..0c74dda 100644
ae23c9
--- a/hw/vfio/ccw.c
ae23c9
+++ b/hw/vfio/ccw.c
ae23c9
@@ -362,6 +362,15 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
+    /*
ae23c9
+     * All vfio-ccw devices are believed to operate in a way compatible with
ae23c9
+     * memory ballooning, ie. pages pinned in the host are in the current
ae23c9
+     * working set of the guest driver and therefore never overlap with pages
ae23c9
+     * available to the guest balloon driver.  This needs to be set before
ae23c9
+     * vfio_get_device() for vfio common to handle the balloon inhibitor.
ae23c9
+     */
ae23c9
+    vcdev->vdev.balloon_allowed = true;
ae23c9
+
ae23c9
     if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
ae23c9
         g_free(vcdev->vdev.name);
ae23c9
         goto out_device_err;
ae23c9
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
ae23c9
index 7e8f289..cda2d1f 100644
ae23c9
--- a/hw/vfio/common.c
ae23c9
+++ b/hw/vfio/common.c
ae23c9
@@ -1376,7 +1376,9 @@ void vfio_put_group(VFIOGroup *group)
ae23c9
         return;
ae23c9
     }
ae23c9
 
ae23c9
-    qemu_balloon_inhibit(false);
ae23c9
+    if (!group->balloon_allowed) {
ae23c9
+        qemu_balloon_inhibit(false);
ae23c9
+    }
ae23c9
     vfio_kvm_device_del_group(group);
ae23c9
     vfio_disconnect_container(group);
ae23c9
     QLIST_REMOVE(group, next);
ae23c9
@@ -1412,6 +1414,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
ae23c9
         return ret;
ae23c9
     }
ae23c9
 
ae23c9
+    /*
ae23c9
+     * Clear the balloon inhibitor for this group if the driver knows the
ae23c9
+     * device operates compatibly with ballooning.  Setting must be consistent
ae23c9
+     * per group, but since compatibility is really only possible with mdev
ae23c9
+     * currently, we expect singleton groups.
ae23c9
+     */
ae23c9
+    if (vbasedev->balloon_allowed != group->balloon_allowed) {
ae23c9
+        if (!QLIST_EMPTY(&group->device_list)) {
ae23c9
+            error_setg(errp,
ae23c9
+                       "Inconsistent device balloon setting within group");
ae23c9
+            return -1;
ae23c9
+        }
ae23c9
+
ae23c9
+        if (!group->balloon_allowed) {
ae23c9
+            group->balloon_allowed = true;
ae23c9
+            qemu_balloon_inhibit(false);
ae23c9
+        }
ae23c9
+    }
ae23c9
+
ae23c9
     vbasedev->fd = fd;
ae23c9
     vbasedev->group = group;
ae23c9
     QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
ae23c9
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
ae23c9
index 4683eb4..d43727f 100644
ae23c9
--- a/hw/vfio/pci.c
ae23c9
+++ b/hw/vfio/pci.c
ae23c9
@@ -2803,12 +2803,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ae23c9
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
ae23c9
     VFIODevice *vbasedev_iter;
ae23c9
     VFIOGroup *group;
ae23c9
-    char *tmp, group_path[PATH_MAX], *group_name;
ae23c9
+    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
ae23c9
     Error *err = NULL;
ae23c9
     ssize_t len;
ae23c9
     struct stat st;
ae23c9
     int groupid;
ae23c9
     int ret, i = 0;
ae23c9
+    bool is_mdev;
ae23c9
 
ae23c9
     QLIST_FOREACH(group, &vfio_group_list, next) {
ae23c9
         QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
ae23c9
@@ -2880,6 +2881,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
+    /*
ae23c9
+     * Mediated devices *might* operate compatibly with memory ballooning, but
ae23c9
+     * we cannot know for certain, it depends on whether the mdev vendor driver
ae23c9
+     * stays in sync with the active working set of the guest driver.  Prevent
ae23c9
+     * the x-balloon-allowed option unless this is minimally an mdev device.
ae23c9
+     */
ae23c9
+    tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
ae23c9
+    subsys = realpath(tmp, NULL);
ae23c9
+    g_free(tmp);
ae23c9
+    is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
ae23c9
+    free(subsys);
ae23c9
+
ae23c9
+    trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
ae23c9
+
ae23c9
+    if (vdev->vbasedev.balloon_allowed && !is_mdev) {
ae23c9
+        error_setg(errp, "x-balloon-allowed only potentially compatible "
ae23c9
+                   "with mdev devices");
ae23c9
+        vfio_put_group(group);
ae23c9
+        goto error;
ae23c9
+    }
ae23c9
+
ae23c9
     ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
ae23c9
     if (ret) {
ae23c9
         vfio_put_group(group);
ae23c9
@@ -3177,6 +3199,8 @@ static Property vfio_pci_dev_properties[] = {
ae23c9
     DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
ae23c9
                     VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
ae23c9
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
ae23c9
+    DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
ae23c9
+                     vbasedev.balloon_allowed, false),
ae23c9
     DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
ae23c9
     DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
ae23c9
     DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
ae23c9
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
ae23c9
index 20109cb..9487887 100644
ae23c9
--- a/hw/vfio/trace-events
ae23c9
+++ b/hw/vfio/trace-events
ae23c9
@@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
ae23c9
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
ae23c9
 vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
ae23c9
 vfio_realize(const char *name, int group_id) " (%s) group %d"
ae23c9
+vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
ae23c9
 vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
ae23c9
 vfio_pci_reset(const char *name) " (%s)"
ae23c9
 vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
ae23c9
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
ae23c9
index f29df6e..36ee657 100644
ae23c9
--- a/include/hw/vfio/vfio-common.h
ae23c9
+++ b/include/hw/vfio/vfio-common.h
ae23c9
@@ -123,6 +123,7 @@ typedef struct VFIODevice {
ae23c9
     bool reset_works;
ae23c9
     bool needs_reset;
ae23c9
     bool no_mmap;
ae23c9
+    bool balloon_allowed;
ae23c9
     VFIODeviceOps *ops;
ae23c9
     unsigned int num_irqs;
ae23c9
     unsigned int num_regions;
ae23c9
@@ -142,6 +143,7 @@ typedef struct VFIOGroup {
ae23c9
     QLIST_HEAD(, VFIODevice) device_list;
ae23c9
     QLIST_ENTRY(VFIOGroup) next;
ae23c9
     QLIST_ENTRY(VFIOGroup) container_next;
ae23c9
+    bool balloon_allowed;
ae23c9
 } VFIOGroup;
ae23c9
 
ae23c9
 typedef struct VFIODMABuf {
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9