|
|
29b115 |
From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001
|
|
|
29b115 |
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
|
|
29b115 |
Date: Mon, 13 Jun 2022 14:10:08 +0800
|
|
|
29b115 |
Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned
|
|
|
29b115 |
device
|
|
|
29b115 |
|
|
|
29b115 |
RH-Author: Eric Auger <eric.auger@redhat.com>
|
|
|
29b115 |
RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices
|
|
|
29b115 |
RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm)
|
|
|
29b115 |
RH-Bugzilla: 2100106
|
|
|
29b115 |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
|
|
29b115 |
|
|
|
29b115 |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106
|
|
|
29b115 |
|
|
|
29b115 |
Currently assigned devices can not work in virtio-iommu bypass mode.
|
|
|
29b115 |
Guest driver fails to probe the device due to DMA failure. And the
|
|
|
29b115 |
reason is because of lacking GPA -> HPA mappings when VM is created.
|
|
|
29b115 |
|
|
|
29b115 |
Add a root container memory region to hold both bypass memory region
|
|
|
29b115 |
and iommu memory region, so the switch between them is supported
|
|
|
29b115 |
just like the implementation in virtual VT-d.
|
|
|
29b115 |
|
|
|
29b115 |
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
|
|
29b115 |
Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com>
|
|
|
29b115 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
29b115 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
29b115 |
(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18)
|
|
|
29b115 |
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
|
|
29b115 |
---
|
|
|
29b115 |
hw/virtio/trace-events | 1 +
|
|
|
29b115 |
hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++-
|
|
|
29b115 |
include/hw/virtio/virtio-iommu.h | 2 +
|
|
|
29b115 |
3 files changed, 116 insertions(+), 2 deletions(-)
|
|
|
29b115 |
|
|
|
29b115 |
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
|
|
|
29b115 |
index a5102eac9e..2ab5881b88 100644
|
|
|
29b115 |
--- a/hw/virtio/trace-events
|
|
|
29b115 |
+++ b/hw/virtio/trace-events
|
|
|
29b115 |
@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin
|
|
|
29b115 |
virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64
|
|
|
29b115 |
virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
|
|
|
29b115 |
virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
|
|
|
29b115 |
+virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
|
|
|
29b115 |
|
|
|
29b115 |
# virtio-mem.c
|
|
|
29b115 |
virtio_mem_send_response(uint16_t type) "type=%" PRIu16
|
|
|
29b115 |
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
|
|
|
29b115 |
index 6d5ea0bdf1..5e99e6c62b 100644
|
|
|
29b115 |
--- a/hw/virtio/virtio-iommu.c
|
|
|
29b115 |
+++ b/hw/virtio/virtio-iommu.c
|
|
|
29b115 |
@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
|
|
|
29b115 |
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
+static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ uint32_t sid;
|
|
|
29b115 |
+ bool bypassed;
|
|
|
29b115 |
+ VirtIOIOMMU *s = sdev->viommu;
|
|
|
29b115 |
+ VirtIOIOMMUEndpoint *ep;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ sid = virtio_iommu_get_bdf(sdev);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ qemu_mutex_lock(&s->mutex);
|
|
|
29b115 |
+ /* need to check bypass before system reset */
|
|
|
29b115 |
+ if (!s->endpoints) {
|
|
|
29b115 |
+ bypassed = s->config.bypass;
|
|
|
29b115 |
+ goto unlock;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
|
|
|
29b115 |
+ if (!ep || !ep->domain) {
|
|
|
29b115 |
+ bypassed = s->config.bypass;
|
|
|
29b115 |
+ } else {
|
|
|
29b115 |
+ bypassed = ep->domain->bypass;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+unlock:
|
|
|
29b115 |
+ qemu_mutex_unlock(&s->mutex);
|
|
|
29b115 |
+ return bypassed;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+/* Return whether the device is using IOMMU translation. */
|
|
|
29b115 |
+static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ bool use_remapping;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ assert(sdev);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ use_remapping = !virtio_iommu_device_bypassed(sdev);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
|
|
|
29b115 |
+ PCI_SLOT(sdev->devfn),
|
|
|
29b115 |
+ PCI_FUNC(sdev->devfn),
|
|
|
29b115 |
+ use_remapping);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ /* Turn off first then on the other */
|
|
|
29b115 |
+ if (use_remapping) {
|
|
|
29b115 |
+ memory_region_set_enabled(&sdev->bypass_mr, false);
|
|
|
29b115 |
+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
|
|
|
29b115 |
+ } else {
|
|
|
29b115 |
+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
|
|
|
29b115 |
+ memory_region_set_enabled(&sdev->bypass_mr, true);
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ return use_remapping;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ GHashTableIter iter;
|
|
|
29b115 |
+ IOMMUPciBus *iommu_pci_bus;
|
|
|
29b115 |
+ int i;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ g_hash_table_iter_init(&iter, s->as_by_busptr);
|
|
|
29b115 |
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
|
|
|
29b115 |
+ for (i = 0; i < PCI_DEVFN_MAX; i++) {
|
|
|
29b115 |
+ if (!iommu_pci_bus->pbdev[i]) {
|
|
|
29b115 |
+ continue;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
/**
|
|
|
29b115 |
* The bus number is used for lookup when SID based operations occur.
|
|
|
29b115 |
* In that case we lazily populate the IOMMUPciBus array from the bus hash
|
|
|
29b115 |
@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
|
|
|
29b115 |
static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
|
|
|
29b115 |
{
|
|
|
29b115 |
VirtIOIOMMUDomain *domain = ep->domain;
|
|
|
29b115 |
+ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
|
|
|
29b115 |
|
|
|
29b115 |
if (!ep->domain) {
|
|
|
29b115 |
return;
|
|
|
29b115 |
@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
|
|
|
29b115 |
ep->iommu_mr);
|
|
|
29b115 |
QLIST_REMOVE(ep, next);
|
|
|
29b115 |
ep->domain = NULL;
|
|
|
29b115 |
+ virtio_iommu_switch_address_space(sdev);
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
|
|
|
29b115 |
@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
|
|
|
29b115 |
|
|
|
29b115 |
trace_virtio_iommu_init_iommu_mr(name);
|
|
|
29b115 |
|
|
|
29b115 |
+ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
|
|
|
29b115 |
+ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ /*
|
|
|
29b115 |
+ * Build the IOMMU disabled container with aliases to the
|
|
|
29b115 |
+ * shared MRs. Note that aliasing to a shared memory region
|
|
|
29b115 |
+ * could help the memory API to detect same FlatViews so we
|
|
|
29b115 |
+ * can have devices to share the same FlatView when in bypass
|
|
|
29b115 |
+ * mode. (either by not configuring virtio-iommu driver or with
|
|
|
29b115 |
+ * "iommu=pt"). It will greatly reduce the total number of
|
|
|
29b115 |
+ * FlatViews of the system hence VM runs faster.
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
|
|
|
29b115 |
+ "system", get_system_memory(), 0,
|
|
|
29b115 |
+ memory_region_size(get_system_memory()));
|
|
|
29b115 |
+
|
|
|
29b115 |
memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
|
|
|
29b115 |
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
|
|
|
29b115 |
OBJECT(s), name,
|
|
|
29b115 |
UINT64_MAX);
|
|
|
29b115 |
- address_space_init(&sdev->as,
|
|
|
29b115 |
- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ /*
|
|
|
29b115 |
+ * Hook both the containers under the root container, we
|
|
|
29b115 |
+ * switch between iommu & bypass MRs by enable/disable
|
|
|
29b115 |
+ * corresponding sub-containers
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+ memory_region_add_subregion_overlap(&sdev->root, 0,
|
|
|
29b115 |
+ MEMORY_REGION(&sdev->iommu_mr),
|
|
|
29b115 |
+ 0);
|
|
|
29b115 |
+ memory_region_add_subregion_overlap(&sdev->root, 0,
|
|
|
29b115 |
+ &sdev->bypass_mr, 0);
|
|
|
29b115 |
+
|
|
|
29b115 |
+ virtio_iommu_switch_address_space(sdev);
|
|
|
29b115 |
g_free(name);
|
|
|
29b115 |
}
|
|
|
29b115 |
return &sdev->as;
|
|
|
29b115 |
@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
|
|
|
29b115 |
uint32_t flags = le32_to_cpu(req->flags);
|
|
|
29b115 |
VirtIOIOMMUDomain *domain;
|
|
|
29b115 |
VirtIOIOMMUEndpoint *ep;
|
|
|
29b115 |
+ IOMMUDevice *sdev;
|
|
|
29b115 |
|
|
|
29b115 |
trace_virtio_iommu_attach(domain_id, ep_id);
|
|
|
29b115 |
|
|
|
29b115 |
@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
|
|
|
29b115 |
QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
|
|
|
29b115 |
|
|
|
29b115 |
ep->domain = domain;
|
|
|
29b115 |
+ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
|
|
|
29b115 |
+ virtio_iommu_switch_address_space(sdev);
|
|
|
29b115 |
|
|
|
29b115 |
/* Replay domain mappings on the associated memory region */
|
|
|
29b115 |
g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
|
|
|
29b115 |
@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev,
|
|
|
29b115 |
return;
|
|
|
29b115 |
}
|
|
|
29b115 |
dev_config->bypass = in_config->bypass;
|
|
|
29b115 |
+ virtio_iommu_switch_address_space_all(dev);
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
trace_virtio_iommu_set_config(in_config->bypass);
|
|
|
29b115 |
@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque)
|
|
|
29b115 |
* system reset
|
|
|
29b115 |
*/
|
|
|
29b115 |
s->config.bypass = s->boot_bypass;
|
|
|
29b115 |
+ virtio_iommu_switch_address_space_all(s);
|
|
|
29b115 |
+
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
|
|
|
29b115 |
@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
|
|
|
29b115 |
virtio_iommu_handle_command);
|
|
|
29b115 |
s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
|
|
|
29b115 |
|
|
|
29b115 |
+ /*
|
|
|
29b115 |
+ * config.bypass is needed to get initial address space early, such as
|
|
|
29b115 |
+ * in vfio realize
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+ s->config.bypass = s->boot_bypass;
|
|
|
29b115 |
s->config.page_size_mask = TARGET_PAGE_MASK;
|
|
|
29b115 |
s->config.input_range.end = UINT64_MAX;
|
|
|
29b115 |
s->config.domain_range.end = UINT32_MAX;
|
|
|
29b115 |
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
|
|
|
29b115 |
index 84391f8448..102eeefa73 100644
|
|
|
29b115 |
--- a/include/hw/virtio/virtio-iommu.h
|
|
|
29b115 |
+++ b/include/hw/virtio/virtio-iommu.h
|
|
|
29b115 |
@@ -37,6 +37,8 @@ typedef struct IOMMUDevice {
|
|
|
29b115 |
int devfn;
|
|
|
29b115 |
IOMMUMemoryRegion iommu_mr;
|
|
|
29b115 |
AddressSpace as;
|
|
|
29b115 |
+ MemoryRegion root; /* The root container of the device */
|
|
|
29b115 |
+ MemoryRegion bypass_mr; /* The alias of shared memory MR */
|
|
|
29b115 |
} IOMMUDevice;
|
|
|
29b115 |
|
|
|
29b115 |
typedef struct IOMMUPciBus {
|
|
|
29b115 |
--
|
|
|
29b115 |
2.31.1
|
|
|
29b115 |
|