|
|
26ba25 |
From f37a1e337dd62c873f18aabd31863c8df144c7ea Mon Sep 17 00:00:00 2001
|
|
|
26ba25 |
From: Alex Williamson <alex.williamson@redhat.com>
|
|
|
26ba25 |
Date: Mon, 3 Dec 2018 22:01:54 +0000
|
|
|
26ba25 |
Subject: [PATCH 13/16] vfio/ccw/pci: Allow devices to opt-in for ballooning
|
|
|
26ba25 |
|
|
|
26ba25 |
RH-Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
26ba25 |
Message-id: <154387451469.27651.8657130146789267501.stgit@gimli.home>
|
|
|
26ba25 |
Patchwork-id: 83236
|
|
|
26ba25 |
O-Subject: [RHEL-8.0 qemu-kvm PATCH 4/7] vfio/ccw/pci: Allow devices to opt-in for ballooning
|
|
|
26ba25 |
Bugzilla: 1650272
|
|
|
26ba25 |
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
|
|
26ba25 |
|
|
|
26ba25 |
Bugzilla: 1650272
|
|
|
26ba25 |
|
|
|
26ba25 |
If a vfio assigned device makes use of a physical IOMMU, then memory
|
|
|
26ba25 |
ballooning is necessarily inhibited due to the page pinning, lack of
|
|
|
26ba25 |
page level granularity at the IOMMU, and sufficient notifiers to both
|
|
|
26ba25 |
remove the page on balloon inflation and add it back on deflation.
|
|
|
26ba25 |
However, not all devices are backed by a physical IOMMU. In the case
|
|
|
26ba25 |
of mediated devices, if a vendor driver is well synchronized with the
|
|
|
26ba25 |
guest driver, such that only pages actively used by the guest driver
|
|
|
26ba25 |
are pinned by the host mdev vendor driver, then there should be no
|
|
|
26ba25 |
overlap between pages available for the balloon driver and pages
|
|
|
26ba25 |
actively in use by the device. Under these conditions, ballooning
|
|
|
26ba25 |
should be safe.
|
|
|
26ba25 |
|
|
|
26ba25 |
vfio-ccw devices are always mediated devices and always operate under
|
|
|
26ba25 |
the constraints above. Therefore we can consider all vfio-ccw devices
|
|
|
26ba25 |
as balloon compatible.
|
|
|
26ba25 |
|
|
|
26ba25 |
The situation is far from straightforward with vfio-pci. These
|
|
|
26ba25 |
devices can be physical devices with physical IOMMU backing or
|
|
|
26ba25 |
mediated devices where it is unknown whether a physical IOMMU is in
|
|
|
26ba25 |
use or whether the vendor driver is well synchronized to the working
|
|
|
26ba25 |
set of the guest driver. The safest approach is therefore to assume
|
|
|
26ba25 |
all vfio-pci devices are incompatible with ballooning, but allow user
|
|
|
26ba25 |
opt-in should they have further insight into mediated devices.
|
|
|
26ba25 |
|
|
|
26ba25 |
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
|
26ba25 |
(cherry picked from commit 238e91728503d400e1c4e644e3a9b80f9e621682)
|
|
|
26ba25 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
26ba25 |
---
|
|
|
26ba25 |
hw/vfio/ccw.c | 9 +++++++++
|
|
|
26ba25 |
hw/vfio/common.c | 23 ++++++++++++++++++++++-
|
|
|
26ba25 |
hw/vfio/pci.c | 26 +++++++++++++++++++++++++-
|
|
|
26ba25 |
hw/vfio/trace-events | 1 +
|
|
|
26ba25 |
include/hw/vfio/vfio-common.h | 2 ++
|
|
|
26ba25 |
5 files changed, 59 insertions(+), 2 deletions(-)
|
|
|
26ba25 |
|
|
|
26ba25 |
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
|
|
26ba25 |
index fe34b50..0c74dda 100644
|
|
|
26ba25 |
--- a/hw/vfio/ccw.c
|
|
|
26ba25 |
+++ b/hw/vfio/ccw.c
|
|
|
26ba25 |
@@ -362,6 +362,15 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
|
|
26ba25 |
}
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
+ /*
|
|
|
26ba25 |
+ * All vfio-ccw devices are believed to operate in a way compatible with
|
|
|
26ba25 |
+ * memory ballooning, ie. pages pinned in the host are in the current
|
|
|
26ba25 |
+ * working set of the guest driver and therefore never overlap with pages
|
|
|
26ba25 |
+ * available to the guest balloon driver. This needs to be set before
|
|
|
26ba25 |
+ * vfio_get_device() for vfio common to handle the balloon inhibitor.
|
|
|
26ba25 |
+ */
|
|
|
26ba25 |
+ vcdev->vdev.balloon_allowed = true;
|
|
|
26ba25 |
+
|
|
|
26ba25 |
if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
|
|
|
26ba25 |
g_free(vcdev->vdev.name);
|
|
|
26ba25 |
goto out_device_err;
|
|
|
26ba25 |
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
|
|
26ba25 |
index 7e8f289..cda2d1f 100644
|
|
|
26ba25 |
--- a/hw/vfio/common.c
|
|
|
26ba25 |
+++ b/hw/vfio/common.c
|
|
|
26ba25 |
@@ -1376,7 +1376,9 @@ void vfio_put_group(VFIOGroup *group)
|
|
|
26ba25 |
return;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
- qemu_balloon_inhibit(false);
|
|
|
26ba25 |
+ if (!group->balloon_allowed) {
|
|
|
26ba25 |
+ qemu_balloon_inhibit(false);
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
vfio_kvm_device_del_group(group);
|
|
|
26ba25 |
vfio_disconnect_container(group);
|
|
|
26ba25 |
QLIST_REMOVE(group, next);
|
|
|
26ba25 |
@@ -1412,6 +1414,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
|
|
|
26ba25 |
return ret;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
+ /*
|
|
|
26ba25 |
+ * Clear the balloon inhibitor for this group if the driver knows the
|
|
|
26ba25 |
+ * device operates compatibly with ballooning. Setting must be consistent
|
|
|
26ba25 |
+ * per group, but since compatibility is really only possible with mdev
|
|
|
26ba25 |
+ * currently, we expect singleton groups.
|
|
|
26ba25 |
+ */
|
|
|
26ba25 |
+ if (vbasedev->balloon_allowed != group->balloon_allowed) {
|
|
|
26ba25 |
+ if (!QLIST_EMPTY(&group->device_list)) {
|
|
|
26ba25 |
+ error_setg(errp,
|
|
|
26ba25 |
+ "Inconsistent device balloon setting within group");
|
|
|
26ba25 |
+ return -1;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ if (!group->balloon_allowed) {
|
|
|
26ba25 |
+ group->balloon_allowed = true;
|
|
|
26ba25 |
+ qemu_balloon_inhibit(false);
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
vbasedev->fd = fd;
|
|
|
26ba25 |
vbasedev->group = group;
|
|
|
26ba25 |
QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
|
|
|
26ba25 |
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
|
|
26ba25 |
index 4683eb4..d43727f 100644
|
|
|
26ba25 |
--- a/hw/vfio/pci.c
|
|
|
26ba25 |
+++ b/hw/vfio/pci.c
|
|
|
26ba25 |
@@ -2803,12 +2803,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|
|
26ba25 |
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
|
|
|
26ba25 |
VFIODevice *vbasedev_iter;
|
|
|
26ba25 |
VFIOGroup *group;
|
|
|
26ba25 |
- char *tmp, group_path[PATH_MAX], *group_name;
|
|
|
26ba25 |
+ char *tmp, *subsys, group_path[PATH_MAX], *group_name;
|
|
|
26ba25 |
Error *err = NULL;
|
|
|
26ba25 |
ssize_t len;
|
|
|
26ba25 |
struct stat st;
|
|
|
26ba25 |
int groupid;
|
|
|
26ba25 |
int ret, i = 0;
|
|
|
26ba25 |
+ bool is_mdev;
|
|
|
26ba25 |
|
|
|
26ba25 |
QLIST_FOREACH(group, &vfio_group_list, next) {
|
|
|
26ba25 |
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
|
|
26ba25 |
@@ -2880,6 +2881,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|
|
26ba25 |
}
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
+ /*
|
|
|
26ba25 |
+ * Mediated devices *might* operate compatibly with memory ballooning, but
|
|
|
26ba25 |
+ * we cannot know for certain, it depends on whether the mdev vendor driver
|
|
|
26ba25 |
+ * stays in sync with the active working set of the guest driver. Prevent
|
|
|
26ba25 |
+ * the x-balloon-allowed option unless this is minimally an mdev device.
|
|
|
26ba25 |
+ */
|
|
|
26ba25 |
+ tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
|
|
|
26ba25 |
+ subsys = realpath(tmp, NULL);
|
|
|
26ba25 |
+ g_free(tmp);
|
|
|
26ba25 |
+ is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
|
|
|
26ba25 |
+ free(subsys);
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ if (vdev->vbasedev.balloon_allowed && !is_mdev) {
|
|
|
26ba25 |
+ error_setg(errp, "x-balloon-allowed only potentially compatible "
|
|
|
26ba25 |
+ "with mdev devices");
|
|
|
26ba25 |
+ vfio_put_group(group);
|
|
|
26ba25 |
+ goto error;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
|
|
|
26ba25 |
if (ret) {
|
|
|
26ba25 |
vfio_put_group(group);
|
|
|
26ba25 |
@@ -3177,6 +3199,8 @@ static Property vfio_pci_dev_properties[] = {
|
|
|
26ba25 |
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
|
|
|
26ba25 |
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
|
|
26ba25 |
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
|
|
26ba25 |
+ DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
|
|
|
26ba25 |
+ vbasedev.balloon_allowed, false),
|
|
|
26ba25 |
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
|
|
26ba25 |
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
|
|
26ba25 |
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
|
|
26ba25 |
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
|
|
26ba25 |
index 20109cb..9487887 100644
|
|
|
26ba25 |
--- a/hw/vfio/trace-events
|
|
|
26ba25 |
+++ b/hw/vfio/trace-events
|
|
|
26ba25 |
@@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
|
|
|
26ba25 |
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
|
|
26ba25 |
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
|
|
|
26ba25 |
vfio_realize(const char *name, int group_id) " (%s) group %d"
|
|
|
26ba25 |
+vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
|
|
|
26ba25 |
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
|
|
|
26ba25 |
vfio_pci_reset(const char *name) " (%s)"
|
|
|
26ba25 |
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
|
|
|
26ba25 |
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
|
|
26ba25 |
index f29df6e..36ee657 100644
|
|
|
26ba25 |
--- a/include/hw/vfio/vfio-common.h
|
|
|
26ba25 |
+++ b/include/hw/vfio/vfio-common.h
|
|
|
26ba25 |
@@ -123,6 +123,7 @@ typedef struct VFIODevice {
|
|
|
26ba25 |
bool reset_works;
|
|
|
26ba25 |
bool needs_reset;
|
|
|
26ba25 |
bool no_mmap;
|
|
|
26ba25 |
+ bool balloon_allowed;
|
|
|
26ba25 |
VFIODeviceOps *ops;
|
|
|
26ba25 |
unsigned int num_irqs;
|
|
|
26ba25 |
unsigned int num_regions;
|
|
|
26ba25 |
@@ -142,6 +143,7 @@ typedef struct VFIOGroup {
|
|
|
26ba25 |
QLIST_HEAD(, VFIODevice) device_list;
|
|
|
26ba25 |
QLIST_ENTRY(VFIOGroup) next;
|
|
|
26ba25 |
QLIST_ENTRY(VFIOGroup) container_next;
|
|
|
26ba25 |
+ bool balloon_allowed;
|
|
|
26ba25 |
} VFIOGroup;
|
|
|
26ba25 |
|
|
|
26ba25 |
typedef struct VFIODMABuf {
|
|
|
26ba25 |
--
|
|
|
26ba25 |
1.8.3.1
|
|
|
26ba25 |
|