Blame SOURCES/kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch

7711c0
From f353edbaafbd3b501495a240ae0d4d679c4ae929 Mon Sep 17 00:00:00 2001
7711c0
From: Alex Williamson <alex.williamson@redhat.com>
7711c0
Date: Mon, 3 Dec 2018 21:53:21 +0100
7711c0
Subject: [PATCH 21/34] vfio/ccw/pci: Allow devices to opt-in for ballooning
7711c0
7711c0
RH-Author: Alex Williamson <alex.williamson@redhat.com>
7711c0
Message-id: <154387400169.26945.14372894868026827700.stgit@gimli.home>
7711c0
Patchwork-id: 83230
7711c0
O-Subject: [RHEL-7.7 qemu-kvm-rhev PATCH 4/7] vfio/ccw/pci: Allow devices to opt-in for ballooning
7711c0
Bugzilla: 1619778
7711c0
RH-Acked-by: Peter Xu <peterx@redhat.com>
7711c0
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
7711c0
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
7711c0
RH-Acked-by: David Hildenbrand <david@redhat.com>
7711c0
7711c0
Bugzilla: 1619778
7711c0
7711c0
If a vfio assigned device makes use of a physical IOMMU, then memory
7711c0
ballooning is necessarily inhibited due to the page pinning, lack of
7711c0
page level granularity at the IOMMU, and sufficient notifiers to both
7711c0
remove the page on balloon inflation and add it back on deflation.
7711c0
However, not all devices are backed by a physical IOMMU.  In the case
7711c0
of mediated devices, if a vendor driver is well synchronized with the
7711c0
guest driver, such that only pages actively used by the guest driver
7711c0
are pinned by the host mdev vendor driver, then there should be no
7711c0
overlap between pages available for the balloon driver and pages
7711c0
actively in use by the device.  Under these conditions, ballooning
7711c0
should be safe.
7711c0
7711c0
vfio-ccw devices are always mediated devices and always operate under
7711c0
the constraints above.  Therefore we can consider all vfio-ccw devices
7711c0
as balloon compatible.
7711c0
7711c0
The situation is far from straightforward with vfio-pci.  These
7711c0
devices can be physical devices with physical IOMMU backing or
7711c0
mediated devices where it is unknown whether a physical IOMMU is in
7711c0
use or whether the vendor driver is well synchronized to the working
7711c0
set of the guest driver.  The safest approach is therefore to assume
7711c0
all vfio-pci devices are incompatible with ballooning, but allow user
7711c0
opt-in should they have further insight into mediated devices.
7711c0
7711c0
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
7711c0
(cherry picked from commit 238e91728503d400e1c4e644e3a9b80f9e621682)
7711c0
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
7711c0
---
7711c0
 hw/vfio/ccw.c                 |  9 +++++++++
7711c0
 hw/vfio/common.c              | 23 ++++++++++++++++++++++-
7711c0
 hw/vfio/pci.c                 | 26 +++++++++++++++++++++++++-
7711c0
 hw/vfio/trace-events          |  1 +
7711c0
 include/hw/vfio/vfio-common.h |  2 ++
7711c0
 5 files changed, 59 insertions(+), 2 deletions(-)
7711c0
7711c0
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
7711c0
index fe34b50..0c74dda 100644
7711c0
--- a/hw/vfio/ccw.c
7711c0
+++ b/hw/vfio/ccw.c
7711c0
@@ -362,6 +362,15 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
7711c0
         }
7711c0
     }
7711c0
 
7711c0
+    /*
7711c0
+     * All vfio-ccw devices are believed to operate in a way compatible with
7711c0
+     * memory ballooning, ie. pages pinned in the host are in the current
7711c0
+     * working set of the guest driver and therefore never overlap with pages
7711c0
+     * available to the guest balloon driver.  This needs to be set before
7711c0
+     * vfio_get_device() for vfio common to handle the balloon inhibitor.
7711c0
+     */
7711c0
+    vcdev->vdev.balloon_allowed = true;
7711c0
+
7711c0
     if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
7711c0
         g_free(vcdev->vdev.name);
7711c0
         goto out_device_err;
7711c0
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
7711c0
index 7e8f289..cda2d1f 100644
7711c0
--- a/hw/vfio/common.c
7711c0
+++ b/hw/vfio/common.c
7711c0
@@ -1376,7 +1376,9 @@ void vfio_put_group(VFIOGroup *group)
7711c0
         return;
7711c0
     }
7711c0
 
7711c0
-    qemu_balloon_inhibit(false);
7711c0
+    if (!group->balloon_allowed) {
7711c0
+        qemu_balloon_inhibit(false);
7711c0
+    }
7711c0
     vfio_kvm_device_del_group(group);
7711c0
     vfio_disconnect_container(group);
7711c0
     QLIST_REMOVE(group, next);
7711c0
@@ -1412,6 +1414,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
7711c0
         return ret;
7711c0
     }
7711c0
 
7711c0
+    /*
7711c0
+     * Clear the balloon inhibitor for this group if the driver knows the
7711c0
+     * device operates compatibly with ballooning.  Setting must be consistent
7711c0
+     * per group, but since compatibility is really only possible with mdev
7711c0
+     * currently, we expect singleton groups.
7711c0
+     */
7711c0
+    if (vbasedev->balloon_allowed != group->balloon_allowed) {
7711c0
+        if (!QLIST_EMPTY(&group->device_list)) {
7711c0
+            error_setg(errp,
7711c0
+                       "Inconsistent device balloon setting within group");
7711c0
+            return -1;
7711c0
+        }
7711c0
+
7711c0
+        if (!group->balloon_allowed) {
7711c0
+            group->balloon_allowed = true;
7711c0
+            qemu_balloon_inhibit(false);
7711c0
+        }
7711c0
+    }
7711c0
+
7711c0
     vbasedev->fd = fd;
7711c0
     vbasedev->group = group;
7711c0
     QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
7711c0
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
7711c0
index c00b91c..3bc7636 100644
7711c0
--- a/hw/vfio/pci.c
7711c0
+++ b/hw/vfio/pci.c
7711c0
@@ -2802,12 +2802,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
7711c0
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
7711c0
     VFIODevice *vbasedev_iter;
7711c0
     VFIOGroup *group;
7711c0
-    char *tmp, group_path[PATH_MAX], *group_name;
7711c0
+    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
7711c0
     Error *err = NULL;
7711c0
     ssize_t len;
7711c0
     struct stat st;
7711c0
     int groupid;
7711c0
     int ret, i = 0;
7711c0
+    bool is_mdev;
7711c0
 
7711c0
     QLIST_FOREACH(group, &vfio_group_list, next) {
7711c0
         QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
7711c0
@@ -2879,6 +2880,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
7711c0
         }
7711c0
     }
7711c0
 
7711c0
+    /*
7711c0
+     * Mediated devices *might* operate compatibly with memory ballooning, but
7711c0
+     * we cannot know for certain, it depends on whether the mdev vendor driver
7711c0
+     * stays in sync with the active working set of the guest driver.  Prevent
7711c0
+     * the x-balloon-allowed option unless this is minimally an mdev device.
7711c0
+     */
7711c0
+    tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
7711c0
+    subsys = realpath(tmp, NULL);
7711c0
+    g_free(tmp);
7711c0
+    is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
7711c0
+    free(subsys);
7711c0
+
7711c0
+    trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
7711c0
+
7711c0
+    if (vdev->vbasedev.balloon_allowed && !is_mdev) {
7711c0
+        error_setg(errp, "x-balloon-allowed only potentially compatible "
7711c0
+                   "with mdev devices");
7711c0
+        vfio_put_group(group);
7711c0
+        goto error;
7711c0
+    }
7711c0
+
7711c0
     ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
7711c0
     if (ret) {
7711c0
         vfio_put_group(group);
7711c0
@@ -3176,6 +3198,8 @@ static Property vfio_pci_dev_properties[] = {
7711c0
     DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
7711c0
                     VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
7711c0
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
7711c0
+    DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
7711c0
+                     vbasedev.balloon_allowed, false),
7711c0
     DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
7711c0
     DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
7711c0
     DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
7711c0
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
7711c0
index 20109cb..9487887 100644
7711c0
--- a/hw/vfio/trace-events
7711c0
+++ b/hw/vfio/trace-events
7711c0
@@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
7711c0
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
7711c0
 vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
7711c0
 vfio_realize(const char *name, int group_id) " (%s) group %d"
7711c0
+vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
7711c0
 vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
7711c0
 vfio_pci_reset(const char *name) " (%s)"
7711c0
 vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
7711c0
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
7711c0
index d936014..c5c4cac 100644
7711c0
--- a/include/hw/vfio/vfio-common.h
7711c0
+++ b/include/hw/vfio/vfio-common.h
7711c0
@@ -122,6 +122,7 @@ typedef struct VFIODevice {
7711c0
     bool reset_works;
7711c0
     bool needs_reset;
7711c0
     bool no_mmap;
7711c0
+    bool balloon_allowed;
7711c0
     VFIODeviceOps *ops;
7711c0
     unsigned int num_irqs;
7711c0
     unsigned int num_regions;
7711c0
@@ -141,6 +142,7 @@ typedef struct VFIOGroup {
7711c0
     QLIST_HEAD(, VFIODevice) device_list;
7711c0
     QLIST_ENTRY(VFIOGroup) next;
7711c0
     QLIST_ENTRY(VFIOGroup) container_next;
7711c0
+    bool balloon_allowed;
7711c0
 } VFIOGroup;
7711c0
 
7711c0
 typedef struct VFIODMABuf {
7711c0
-- 
7711c0
1.8.3.1
7711c0